# Add trial labels

Brief 1-2 sentence description of notebook.

In [1]:
import os
import glob
import git
import sys


In [2]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
# import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import h5py
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter


In [3]:
git_repo = git.Repo(".", search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")

In [4]:
git_root

'/blue/npadillacoreano/ryoi360/projects/reward_comp/repos/reward_competition_extention'

In [5]:
sys.path.insert(0, os.path.join(git_root, 'src'))

In [6]:
# sns.set('notebook', 'ticks', font_scale=1.2)
mpl.rcParams['figure.figsize'] = [15,6]

## Functions

## Inputs & Data

Explanation of each input and where it comes from.

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

In [7]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc/" # where data is saved should always be shown in the inputs
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_PREFIX = "rce_pilot_2"

In [8]:
FULL_LFP_TRACES_PKL = "{}_03_spectral_trial_labels.pkl".format(OUTPUT_PREFIX)

## Processing

### Looking at when each subject was in each video

In [9]:
subject_start_stop_frames = pd.read_excel("./data/rce_per_subject_start_stop_video_frame.xlsx")
subject_start_stop_frames = subject_start_stop_frames.dropna(subset=["file_path"])

- Getting the name of the SLEAP and video files where each subject was in

In [10]:
subject_start_stop_frames["sleap_name"] = subject_start_stop_frames["file_path"].apply(lambda x: os.path.basename(x))
subject_start_stop_frames["video_name"] = subject_start_stop_frames["file_path"].apply(lambda x: ".".join(os.path.basename(x).split(".")[:2]))
subject_start_stop_frames["start_frame"] = subject_start_stop_frames["start_frame"].astype(int)
subject_start_stop_frames["stop_frame"] = subject_start_stop_frames["stop_frame"].astype(int)

In [11]:
subject_start_stop_frames = subject_start_stop_frames.drop(columns=["file_path", "notes", "in_video_subjects"], errors="ignore")

In [12]:
subject_start_stop_frames["video_name"].unique()

array(['20221214_125409_om_and_comp_6_1_and_6_3.1',
       '20221215_145401_comp_amd_om_6_1_and_6_3.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.2',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.1',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.2',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.1',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.2',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.3',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.1',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.2',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.1',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.3',
       '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1.1',
       '

In [13]:
subject_start_stop_frames.head()

Unnamed: 0,start_frame,stop_frame,tracked_subject,box_number,sleap_name,video_name
0,1,25000,6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.1_su...,20221214_125409_om_and_comp_6_1_and_6_3.1
1,27500,73600,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.2_su...,20221214_125409_om_and_comp_6_1_and_6_3.1
2,51500,76454,6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.1_su...,20221215_145401_comp_amd_om_6_1_and_6_3.1
3,1,48500,6.1_6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.2_su...,20221215_145401_comp_amd_om_6_1_and_6_3.1
4,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...


- Splitting each row into seperate row for each subject in the video

In [14]:
subject_start_stop_frames["current_subject"] = subject_start_stop_frames["tracked_subject"].apply(lambda x: str(x).split("_"))

In [15]:
subject_start_stop_frames = subject_start_stop_frames.explode("current_subject")

In [16]:
subject_start_stop_frames.head()

Unnamed: 0,start_frame,stop_frame,tracked_subject,box_number,sleap_name,video_name,current_subject
0,1,25000,6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.1_su...,20221214_125409_om_and_comp_6_1_and_6_3.1,6.3
1,27500,73600,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.2_su...,20221214_125409_om_and_comp_6_1_and_6_3.1,6.1
1,27500,73600,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.2_su...,20221214_125409_om_and_comp_6_1_and_6_3.1,6.3
2,51500,76454,6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.1_su...,20221215_145401_comp_amd_om_6_1_and_6_3.1,6.3
3,1,48500,6.1_6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.2_su...,20221215_145401_comp_amd_om_6_1_and_6_3.1,6.1


### Loading in trial labels

In [17]:
TRIAL_LABELS_DF = pd.read_excel("./data/rce_pilot_2_per_video_trial_labels.xlsx")
TRIAL_LABELS_DF = TRIAL_LABELS_DF.rename(columns={col: col.strip().lower() for col in TRIAL_LABELS_DF.columns})
TRIAL_LABELS_DF = TRIAL_LABELS_DF.dropna(subset=["condition"])

In [18]:
TRIAL_LABELS_DF = TRIAL_LABELS_DF.drop(columns=[col for col in TRIAL_LABELS_DF.columns if "unnamed" in col], errors="ignore")
TRIAL_LABELS_DF = TRIAL_LABELS_DF.drop(columns=[col for col in TRIAL_LABELS_DF.columns if "box" in col], errors="ignore")

- fixing up the data types of the labels

In [19]:
TRIAL_LABELS_DF["tone_start_timestamp"] = TRIAL_LABELS_DF["tone_start_timestamp"].astype(np.int64)
TRIAL_LABELS_DF["tone_stop_timestamp"] = TRIAL_LABELS_DF["tone_stop_timestamp"].astype(np.int64)
TRIAL_LABELS_DF["video_name"] = TRIAL_LABELS_DF["video_name"].apply(lambda x: ".".join(os.path.basename(x).split(".")[:2]))
TRIAL_LABELS_DF["tone_start_frame"] = TRIAL_LABELS_DF["tone_start_frame"].astype(int)
TRIAL_LABELS_DF["reward_start_frame"] = TRIAL_LABELS_DF["reward_start_frame"].astype(int)
TRIAL_LABELS_DF["tone_stop_frame"] = TRIAL_LABELS_DF["tone_stop_frame"].astype(int)

In [20]:
TRIAL_LABELS_DF.head()

Unnamed: 0,video_name,tone_start_frame,reward_start_frame,tone_stop_frame,condition,competition_closeness,notes,experiment,session_dir,all_subjects,first_timestamp,last_timestamp,tone_start_timestamp,tone_stop_timestamp
1,20230612_101430_standard_comp_to_training_D1_s...,980,1080,1181,1.4,Subj 2 Only,,standard,20230612_101430_standard_comp_to_training_D1_s...,"['1.3', '1.4']",8798886.0,77093151.0,982229,1182226
2,20230612_101430_standard_comp_to_training_D1_s...,3376,3476,3575,1.3,Subj 1 Only,,standard,20230612_101430_standard_comp_to_training_D1_s...,"['1.3', '1.4']",8798886.0,77093151.0,3382227,3582224
3,20230612_101430_standard_comp_to_training_D1_s...,5672,5772,5871,1.4,Subj 2 Only,,standard,20230612_101430_standard_comp_to_training_D1_s...,"['1.3', '1.4']",8798886.0,77093151.0,5682225,5882222
4,20230612_101430_standard_comp_to_training_D1_s...,7468,7568,7668,1.4,Subj 2 Only,,standard,20230612_101430_standard_comp_to_training_D1_s...,"['1.3', '1.4']",8798886.0,77093151.0,7482224,7682221
5,20230612_101430_standard_comp_to_training_D1_s...,8566,8666,8765,1.4,Subj 2 Only,,standard,20230612_101430_standard_comp_to_training_D1_s...,"['1.3', '1.4']",8798886.0,77093151.0,8582220,8782223


- Putting the dataframes together based on shared video name

In [21]:
merged_df = pd.merge(subject_start_stop_frames, TRIAL_LABELS_DF, on="video_name", how="inner")
merged_df = merged_df.dropna(subset=["current_subject"])

In [22]:
merged_df.head()

Unnamed: 0,start_frame,stop_frame,tracked_subject,box_number,sleap_name,video_name,current_subject,tone_start_frame,reward_start_frame,tone_stop_frame,condition,competition_closeness,notes,experiment,session_dir,all_subjects,first_timestamp,last_timestamp,tone_start_timestamp,tone_stop_timestamp
0,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,1125,1225,1324,1.1,Subj 1 blocking Subj 2,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,1126742,1326741
1,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,3519,3619,3720,1.2,Subj 2 Only,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,3526740,3726740
2,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,5815,5915,6014,1.2,Subj 2 blocking Subj 1,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,5826740,6026737
3,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,7612,7712,7811,1.2,Subj 2 Only,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,7626736,7826735
4,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,8709,8809,8910,1.2,Subj 2 Only,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,8726734,8926734


- Filtering all trials rows that are not in the video

In [23]:
merged_df = merged_df[(merged_df['tone_start_frame'] > merged_df['start_frame']) & (merged_df['tone_start_frame'] < merged_df['stop_frame'])]

In [24]:
merged_df.head()

Unnamed: 0,start_frame,stop_frame,tracked_subject,box_number,sleap_name,video_name,current_subject,tone_start_frame,reward_start_frame,tone_stop_frame,condition,competition_closeness,notes,experiment,session_dir,all_subjects,first_timestamp,last_timestamp,tone_start_timestamp,tone_stop_timestamp
20,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,37101,37201,37300,rewarded,,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,37126704,37326704
21,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,38299,38399,38498,rewarded,,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,38326703,38526703
22,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,39397,39497,39597,rewarded,,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,39426702,39626701
23,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,40695,40795,40894,rewarded,,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,40726701,40926700
24,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,41792,41892,41992,rewarded,,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",7977066.0,76318450.0,41826697,42026699


- Removing unnecssary columns

In [25]:
merged_df = merged_df.drop(columns=["start_frame", "stop_frame"], errors="ignore")
merged_df = merged_df.drop(columns=["first_timestamp", "last_timestamp"], errors="ignore")
merged_df = merged_df.drop(columns=["box_1_port_entry_frames", "box_2_port_entry_frames"], errors="ignore")
merged_df = merged_df.drop(columns=['box_1_port_entry_timestamps', 'box_2_port_entry_timestamps'], errors="ignore")


In [26]:
merged_df = merged_df.sort_values(by=["session_dir", "tone_start_timestamp", "video_name"]).reset_index(drop=True)

In [27]:
merged_df.head()

Unnamed: 0,tracked_subject,box_number,sleap_name,video_name,current_subject,tone_start_frame,reward_start_frame,tone_stop_frame,condition,competition_closeness,notes,experiment,session_dir,all_subjects,tone_start_timestamp,tone_stop_timestamp
0,1.1_1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.1,1125,1225,1324,1.1,Subj 1 blocking Subj 2,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",1126742,1326741
1,1.1_1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,1125,1225,1324,1.1,Subj 1 blocking Subj 2,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",1126742,1326741
2,1.1_1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.1,3519,3619,3720,1.2,Subj 2 Only,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",3526740,3726740
3,1.1_1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,3519,3619,3720,1.2,Subj 2 Only,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",3526740,3726740
4,1.1_1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.1,5815,5915,6014,1.2,Subj 2 blocking Subj 1,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",5826740,6026737


## Adding competition labels

In [28]:
non_competitive_labels = ['rewarded', 'omission', 'both_rewarded']

In [29]:
merged_df["current_subject"] = merged_df["current_subject"].astype(str)
merged_df["condition"] = merged_df["condition"].astype(str)

In [30]:
merged_df["tracked_subject"] = merged_df["tracked_subject"].astype(str).apply(lambda x: x.split("_"))

In [31]:
merged_df["trial_label"] = merged_df.apply(lambda x: x["condition"] if x["condition"] in non_competitive_labels else ("win" if x["condition"] == x["current_subject"] else "lose"), axis=1)

In [32]:
merged_df.head()

Unnamed: 0,tracked_subject,box_number,sleap_name,video_name,current_subject,tone_start_frame,reward_start_frame,tone_stop_frame,condition,competition_closeness,notes,experiment,session_dir,all_subjects,tone_start_timestamp,tone_stop_timestamp,trial_label
0,"[1.1, 1.2]",1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.1,1125,1225,1324,1.1,Subj 1 blocking Subj 2,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",1126742,1326741,win
1,"[1.1, 1.2]",1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,1125,1225,1324,1.1,Subj 1 blocking Subj 2,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",1126742,1326741,lose
2,"[1.1, 1.2]",1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.1,3519,3619,3720,1.2,Subj 2 Only,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",3526740,3726740,lose
3,"[1.1, 1.2]",1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.2,3519,3619,3720,1.2,Subj 2 Only,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",3526740,3726740,win
4,"[1.1, 1.2]",1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,1.1,5815,5915,6014,1.2,Subj 2 blocking Subj 1,,standard,20230612_112630_standard_comp_to_training_D1_s...,"['1.1', '1.2']",5826740,6026737,lose


In [33]:
merged_df.tail()

Unnamed: 0,tracked_subject,box_number,sleap_name,video_name,current_subject,tone_start_frame,reward_start_frame,tone_stop_frame,condition,competition_closeness,notes,experiment,session_dir,all_subjects,tone_start_timestamp,tone_stop_timestamp,trial_label
1258,"[1.2, 2.2]",2,20230630_115506_standard_comp_to_novel_agent_D...,20230630_115506_standard_comp_to_novel_agent_D...,2.2,66104,66204,66303,2.2,After trial,2.2 Chase or bite 1.4 then grab after trial is...,novel,20230630_115506_standard_comp_to_novel_agent_D...,"['1.2', '1.4', '2.1', '2.2']",66073522,66273524,win
1259,"[1.4, 2.1]",1,20230630_115506_standard_comp_to_novel_agent_D...,20230630_115506_standard_comp_to_novel_agent_D...,1.4,68460,68560,68660,2.1,Subj 2 then Subj 1,,novel,20230630_115506_standard_comp_to_novel_agent_D...,"['1.2', '1.4', '2.1', '2.2']",67073535,67273537,lose
1260,"[1.4, 2.1]",1,20230630_115506_standard_comp_to_novel_agent_D...,20230630_115506_standard_comp_to_novel_agent_D...,2.1,68460,68560,68660,2.1,Subj 2 then Subj 1,,novel,20230630_115506_standard_comp_to_novel_agent_D...,"['1.2', '1.4', '2.1', '2.2']",67073535,67273537,win
1261,"[1.2, 2.2]",2,20230630_115506_standard_comp_to_novel_agent_D...,20230630_115506_standard_comp_to_novel_agent_D...,1.2,67102,67202,67301,2.2,After trial,2.2 Chase or bite 1.4 then grab after trial is...,novel,20230630_115506_standard_comp_to_novel_agent_D...,"['1.2', '1.4', '2.1', '2.2']",67073535,67273537,lose
1262,"[1.2, 2.2]",2,20230630_115506_standard_comp_to_novel_agent_D...,20230630_115506_standard_comp_to_novel_agent_D...,2.2,67102,67202,67301,2.2,After trial,2.2 Chase or bite 1.4 then grab after trial is...,novel,20230630_115506_standard_comp_to_novel_agent_D...,"['1.2', '1.4', '2.1', '2.2']",67073535,67273537,win


In [34]:
merged_df.to_excel("./proc/rce2_finalized_trial_labels.xlsx", index=False)

In [36]:
merged_df.to_pickle("./proc/rce2_finalized_trial_labels.pkl")

In [35]:
raise ValueError()

ValueError: 