# Extract data summary

This notebook uses the `TrialProcessor` class to extract various metrics for all trials.

This came from `fig_extract_data_summary_20230417.ipynb` in the original repo.

In [1]:
from pathlib import Path

import numpy as np
import pandas as pd

from pp_utils.core import generate_data_path_dict
from pp_utils.file_handling import df_main_loader
from pp_utils.trial_processor import TrialProcessor
from pp_utils.misc import interp_xy

import pickle

## Set paths and load main info df

In [2]:
data_path = generate_data_path_dict(Path("../data_processed/"))
raw_path = Path("../data_raw/")

### Output path

In [3]:
output_path = data_path["main"] / "data_summary"
if not output_path.exists():
    output_path.mkdir()

### Load main dataframe

In [4]:
df_main = df_main_loader(
    folder=data_path["info_csv"], filename="main_info_append_09.csv",
)

## Processing params

In [5]:
from pp_utils.core import MISC_PARAMS, HYDRO_PARAMS, ENV_PARAMS, SCAN_PARAMS

In [6]:
MISC_PARAMS

{'th_RL': 140,
 'time_binning_delta': 0.05,
 'buzz_reg_switch': 0.013,
 'num_buzz_for_onset': 30,
 'dist_max': ('DTAG_dist_elliptical', 12),
 'dist_min': ('ROSTRUM_dist_to_target', 0.1)}

In [7]:
HYDRO_PARAMS

{'bkg_len_sec': 3.2e-05,
 'clk_sel_len_sec': 0.000128,
 'perc_before_pk': 30,
 'hydro_sens': -211,
 'recording_gain': 40}

In [8]:
ENV_PARAMS

{'frequency': 130000.0,
 'temperature': 16,
 'salinity': 28,
 'pressure': 1,
 'pH': 8,
 'absorption_formula_source': 'FG'}

In [9]:
SCAN_PARAMS

{'RL_tolerance': 5,
 'th_num_clk': 5,
 'true_scan_th_RL_diff': 5,
 'true_scan_max_num_click_has_RL_diff': 3}

## Extract data summary

In [10]:
SCAN_FIELDS = ("scan_ch0", "scan_ch1", "scan_combined")
TIME_FIELDS = (
    "time_decision", "time_buzz_onset_dtag", "time_buzz_onset_hydro",
    "time_touch", "time_last_nonchosen",
)
RANGE_FIELDS = (
    "range_decision_tar", "range_buzz_onset_dtag_tar", "range_buzz_onset_hydro_tar",
    "range_decision_clu", "range_buzz_onset_dtag_clu", "range_buzz_onset_hydro_clu",
)
ANGLE_FIELDS = (
    "angle_range_target", "angle_range_clutter", "angle_last_scan",
)

In [11]:
df_track_all = []
df_track_portion_all = []
time_all = []
range_all = []
scan_num_all = []
angle_all = []

In [15]:
# Loop through all trials
for trial_idx in df_main.index[2:3]:

    # Init entry
    time_dict = dict.fromkeys(TIME_FIELDS)
    range_dict = dict.fromkeys(RANGE_FIELDS)
    angle_dict = dict.fromkeys(ANGLE_FIELDS)
    scan_dict = dict.fromkeys(SCAN_FIELDS)
    track_all_entry = None
    track_portion_entry = None
    
    # Process trial
    tp = TrialProcessor(df_main, trial_idx, data_path, raw_path)
    
    if tp.trial_usable is True:
        
        # Add track and hydrophone features
        tp.add_track_features()
        tp.add_hydro_features()
        tp.add_SNR_p2p(hydro_params=HYDRO_PARAMS)
        tp.add_RL_ASL_pointEL(hydro_params=HYDRO_PARAMS, env_params=ENV_PARAMS)
        tp.add_before_touch_to_all_dfs()

        # Scan
        tp.get_hydro_scan_num(th_RL=MISC_PARAMS["th_RL"], scan_params=SCAN_PARAMS)
        df_scan_ch0 = tp.sort_df_scan_to_channel(ch=0)
        df_scan_ch1 = tp.sort_df_scan_to_channel(ch=1)

        # Decision
        decision_click = tp.decision_hydro_click_from_scan()

        # Buzz onset
        buzz_onset_dtag = tp.get_dtag_buzz_onset(
            buzz_reg_switch=MISC_PARAMS["buzz_reg_switch"],
            num_buzz_for_onset=MISC_PARAMS["num_buzz_for_onset"]
        )
        buzz_onset_hydro = tp.get_hydro_buzz_onset(
            buzz_reg_switch=MISC_PARAMS["buzz_reg_switch"],
            num_buzz_for_onset=MISC_PARAMS["num_buzz_for_onset"]
        )

        # Positions
        decision_pos = interp_xy(decision_click["time_corrected"], tp.df_track)
        buzz_onset_dtag_pos = interp_xy(buzz_onset_dtag["time_corrected"], tp.df_track)
        buzz_onset_hydro_pos = interp_xy(buzz_onset_hydro["time_corrected"], tp.df_track)
        
        # Inspection angle
        angle_ch0_in_cam, angle_ch1_in_cam = tp.get_inspection_angle_in_view(
            time_stop=decision_click["time_corrected"],
            th_RL=MISC_PARAMS["th_RL"],
            time_binning_delta=MISC_PARAMS["time_binning_delta"]
        )
        
        # Last scan start/end time
        tp.get_timing_last_scan_of_nonselect()

    
        # Collect info for each trial -------------------------------

        track_all_entry = tp.df_track
        track_portion_entry = tp.get_desired_track_portion(
            dist_max=MISC_PARAMS["dist_max"], dist_min=MISC_PARAMS["dist_min"]
        )
        
        scan_dict.update({
            "scan_ch0": len(df_scan_ch0),      # scan toward ch0
            "scan_ch1": len(df_scan_ch1),      # scan toward ch1
            "scan_combined": len(tp.df_scan),  # all scans across 2 ch
        })

        time_dict.update({
            "time_decision": decision_click["time_corrected"],  # decision time
            "time_buzz_onset_dtag": buzz_onset_dtag["time_corrected"],  # buzz onset identified in dtag
            "time_buzz_onset_hydro": buzz_onset_hydro["time_corrected"],  # buzz onset identified in hydro
            "time_touch": tp.df_dtag[tp.df_dtag["before_touch"]].iloc[-1]["time_corrected"],  # touch time
            "time_last_nonchosen": tp.duration_last_scan_of_nonselect(),  # last scan duration
        })

        if tp.trial_series["TARGET_ANGLE"][:2] == "TC":
            pos_tar, pos_clu = [0, -1], [0, 0]
        else:
            pos_tar, pos_clu = [0, 0], [0, -1]
        range_dict.update({
            "range_decision_tar": np.linalg.norm(decision_pos - pos_tar),
            "range_buzz_onset_dtag_tar": np.linalg.norm(buzz_onset_dtag_pos - pos_tar),
            "range_buzz_onset_hydro_tar": np.linalg.norm(buzz_onset_hydro_pos - pos_tar),
            "range_decision_clu": np.linalg.norm(decision_pos - pos_clu),
            "range_buzz_onset_dtag_clu": np.linalg.norm(buzz_onset_dtag_pos - pos_clu),
            "range_buzz_onset_hydro_clu": np.linalg.norm(buzz_onset_hydro_pos - pos_clu),
        })
        
        angle_dict.update({
            "angle_range_target": (
                None if angle_ch0_in_cam.size == 0 else angle_ch0_in_cam.max() - angle_ch0_in_cam.min() 
            ),
            "angle_range_clutter": (
                None if angle_ch1_in_cam.size == 0 else angle_ch1_in_cam.max() - angle_ch1_in_cam.min() 
            ),
            "angle_last_scan": tp.angle_span_last_scan_of_nonselect(),
        })
        
    # Append to list
    df_track_all.append(track_all_entry)
    df_track_portion_all.append(track_portion_entry)
    scan_num_all.append(list(scan_dict.values()))
    time_all.append(list(time_dict.values()))
    range_all.append(list(range_dict.values()))
    angle_all.append(list(angle_dict.values()))


------ trial 2 ------------
20190625_s1_t1
Some data missing in this trial, skipping...


## Organize dataframe

In [26]:
df_scan_all = pd.DataFrame(scan_num_all, columns=SCAN_FIELDS)
df_time_all = pd.DataFrame(time_all, columns=TIME_FIELDS)
df_range_all = pd.DataFrame(range_all, columns=RANGE_FIELDS)
df_angle_all = pd.DataFrame(angle_all, columns=ANGLE_FIELDS)

In [27]:
df_merge_all = pd.concat((df_scan_all, df_time_all, df_range_all, df_angle_all), axis=1)

In [28]:
df_merge_all[["SPHEROID", "CHOICE", "TARGET_ANGLE", "fname_prefix"]] = df_main[
    ["SPHEROID", "CHOICE", "TARGET_ANGLE", "fname_prefix"]
]

In [29]:
df_merge_all

Unnamed: 0,scan_ch0,scan_ch1,scan_combined,time_decision,time_buzz_onset_dtag,time_buzz_onset_hydro,time_touch,time_last_nonchosen,range_decision_tar,range_buzz_onset_dtag_tar,...,range_decision_clu,range_buzz_onset_dtag_clu,range_buzz_onset_hydro_clu,angle_range_target,angle_range_clutter,angle_last_scan,SPHEROID,CHOICE,TARGET_ANGLE,fname_prefix
0,,,,,,,,,,,...,,,,,,,X,-1,TX1,20190625_s1_t0
1,,,,,,,,,,,...,,,,,,,X,-1,TX1,20190625_s1_t0
2,7.0,7.0,13.0,530.550648,531.263587,531.266230,532.564810,0.268760,1.083544,0.718888,...,1.555132,1.422822,1.421993,17.458140,54.204238,5.168805,L,1,TC1,20190625_s1_t1
3,5.0,5.0,9.0,676.986342,677.812622,677.813616,679.845126,0.304592,1.252419,0.793524,...,1.352446,0.930691,0.930646,16.727133,7.348420,1.777283,L,1,CT1,20190625_s1_t2
4,7.0,6.0,11.0,764.885338,766.960336,766.961776,768.832284,0.366148,2.510579,0.984719,...,2.441380,1.450829,1.450659,5.177008,11.058150,1.648957,L,1,TC1,20190625_s1_t3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,6.0,5.0,9.0,771.450836,772.415519,772.453444,774.404724,0.423488,1.811494,1.017214,...,1.461558,1.142615,1.141121,22.621670,8.123448,7.569817,M,1,TC1,20190704_s3_t8
243,7.0,6.0,10.0,830.557031,830.896123,830.894679,832.663858,0.356172,1.222779,0.956701,...,1.062928,0.852896,0.853144,26.562826,3.005599,0.876847,M,1,CT3,20190704_s3_t9
244,4.0,4.0,7.0,884.984590,885.468077,885.475970,887.818785,1.071250,1.145556,1.003286,...,1.379984,1.042868,1.039515,6.361237,16.815334,1.342138,M,0,TC3,20190704_s3_t10
245,4.0,4.0,7.0,932.993872,934.163839,934.192478,937.301557,0.572210,2.020452,1.198936,...,1.929837,1.033472,1.008076,13.854835,5.841551,4.376336,M,0,TC3,20190704_s3_t11


### Select which buzz to use

In [30]:
def clean_buzz(x, clean_type):
    # For trials in 20190618_s1, use hydro buzz onset
    if x.name >=89 and x.name <=98:
        if clean_type == "time":
            return x["time_buzz_onset_hydro"]
        elif clean_type == "range_target":
            return x["range_buzz_onset_hydro_tar"]
        elif clean_type == "range_clutter":
            return x["range_buzz_onset_hydro_clu"]
        else:
            raise ValueError("Provided type is not supported!")
    
    # For all other trials: use whichever earlier
    if x["time_buzz_onset_hydro"] < x["time_buzz_onset_dtag"]:
        if clean_type == "time":
            return x["time_buzz_onset_hydro"]
        elif clean_type == "range_target":
            return x["range_buzz_onset_hydro_tar"]
        elif clean_type == "range_clutter":
            return x["range_buzz_onset_hydro_clu"]
        else:
            raise ValueError("Provided type is not supported!")
    else:
        if clean_type == "time":
            return x["time_buzz_onset_dtag"]
        elif clean_type == "range_target":
            return x["range_buzz_onset_dtag_tar"]
        elif clean_type == "range_clutter":
            return x["range_buzz_onset_dtag_clu"]
        else:
            raise ValueError("Provided type is not supported!")


In [31]:
df_merge_all["time_buzz_onset_clean"] = df_merge_all.apply(clean_buzz, clean_type="time", axis=1)

In [32]:
df_merge_all["time_decision2touch"] = df_merge_all["time_touch"] - df_merge_all["time_decision"]
df_merge_all["time_buzz2touch"] = df_merge_all["time_touch"] - df_merge_all["time_buzz_onset_clean"]

In [33]:
df_merge_all["range_buzz_onset_tar_clean"] = df_merge_all.apply(clean_buzz, clean_type="range_target", axis=1)
df_merge_all["range_buzz_onset_clu_clean"] = df_merge_all.apply(clean_buzz, clean_type="range_clutter", axis=1)

In [34]:
def clean_range_buzz_onset(x):
    if x["time_decision2touch"] < x["time_buzz2touch"]:
        # Decision AFTER buzz onset (buzzing at clutter first)
        return x["range_buzz_onset_clu_clean"]
    else:
        # Decision BEFORE buzz onset (buzzing at the target)
        return x["range_buzz_onset_tar_clean"]

In [35]:
df_merge_all["range_buzz_onset_closest"] = df_merge_all.apply(
    clean_range_buzz_onset, axis=1
)

### Export to CSV

In [36]:
# summary
df_merge_all.to_csv(output_path / "data_summary_all_20230426.csv")

# full tracks
with open(output_path / "track_all.pickle", "wb") as fileout:
    pickle.dump(df_track_all, fileout)

# track portions used for clustering
with open(output_path / "track_portion_all.pickle", "wb") as fileout:
    pickle.dump(df_track_portion_all, fileout)