# Add trial labels

Brief 1-2 sentence description of notebook.

In [1]:
import os
import glob
import git
import sys


In [2]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
# import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import h5py
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter


In [3]:
git_repo = git.Repo(".", search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")

In [4]:
git_root

'/blue/npadillacoreano/ryoi360/projects/reward_comp/repos/reward_competition_extention'

In [5]:
sys.path.insert(0, os.path.join(git_root, 'src'))

In [6]:
# sns.set('notebook', 'ticks', font_scale=1.2)
mpl.rcParams['figure.figsize'] = [15,6]

## Functions

## Inputs & Data

Explanation of each input and where it comes from.

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

In [7]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc/" # where data is saved should always be shown in the inputs
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_PREFIX = "rce_pilot_2"

In [8]:
FULL_LFP_TRACES_PKL = "{}_03_spectral_trial_labels.pkl".format(OUTPUT_PREFIX)

## Processing

### Looking at when each subject was in each video

In [9]:
subject_start_stop_frames = pd.read_excel("./data/rce_per_subject_start_stop_video_frame.xlsx")
subject_start_stop_frames = subject_start_stop_frames.dropna(subset=["file_path"])

- Getting the name of the SLEAP and video files where each subject was in

In [10]:
subject_start_stop_frames["sleap_name"] = subject_start_stop_frames["file_path"].apply(lambda x: os.path.basename(x))
subject_start_stop_frames["video_name"] = subject_start_stop_frames["file_path"].apply(lambda x: ".".join(os.path.basename(x).split(".")[:2]))
subject_start_stop_frames["start_frame"] = subject_start_stop_frames["start_frame"].astype(int)
subject_start_stop_frames["stop_frame"] = subject_start_stop_frames["stop_frame"].astype(int)

In [11]:
subject_start_stop_frames = subject_start_stop_frames.drop(columns=["file_path", "notes", "in_video_subjects"], errors="ignore")

In [12]:
subject_start_stop_frames["video_name"].unique()

array(['20221214_125409_om_and_comp_6_1_and_6_3.1',
       '20221215_145401_comp_amd_om_6_1_and_6_3.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.2',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.1',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.2',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.1',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.2',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.3',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.1',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.2',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.1',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.3',
       '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1.1',
       '

In [13]:
subject_start_stop_frames.head()

Unnamed: 0,start_frame,stop_frame,tracked_subject,box_number,sleap_name,video_name
0,1,25000,6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1
1,27500,73600,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1
2,51500,76454,6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.fixe...,20221215_145401_comp_amd_om_6_1_and_6_3.1
3,1,48500,6.1_6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.fixe...,20221215_145401_comp_amd_om_6_1_and_6_3.1
4,32700,68257,1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...


- Splitting each row into seperate row for each subject in the video

In [14]:
subject_start_stop_frames["current_subject"] = subject_start_stop_frames["tracked_subject"].apply(lambda x: str(x).split("_"))

In [15]:
subject_start_stop_frames = subject_start_stop_frames.explode("current_subject")

In [16]:
subject_start_stop_frames.head()

Unnamed: 0,start_frame,stop_frame,tracked_subject,box_number,sleap_name,video_name,current_subject
0,1,25000,6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1,6.3
1,27500,73600,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1,6.1
1,27500,73600,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1,6.3
2,51500,76454,6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.fixe...,20221215_145401_comp_amd_om_6_1_and_6_3.1,6.3
3,1,48500,6.1_6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.fixe...,20221215_145401_comp_amd_om_6_1_and_6_3.1,6.1


In [17]:
raise ValueError()

ValueError: 

In [None]:
TRIAL_LABELS_DF = pd.read_excel("./data/rce_pilot_2_per_video_trial_labels.xlsx")
TRIAL_LABELS_DF = TRIAL_LABELS_DF.rename(columns={col: col.strip().lower() for col in TRIAL_LABELS_DF.columns})
TRIAL_LABELS_DF = TRIAL_LABELS_DF.dropna(subset=["condition"])
TRIAL_LABELS_DF["tone_start_timestamp"] = TRIAL_LABELS_DF["tone_start_timestamp"].astype(np.int64)
TRIAL_LABELS_DF["tone_stop_timestamp"] = TRIAL_LABELS_DF["tone_stop_timestamp"].astype(np.int64)
TRIAL_LABELS_DF["video_name"] = TRIAL_LABELS_DF["video_name"].apply(lambda x: ".".join(os.path.basename(x).split(".")[:2]))

TRIAL_LABELS_DF["tone_start_frame"] = TRIAL_LABELS_DF["tone_start_frame"].astype(int)
TRIAL_LABELS_DF["reward_start_frame"] = TRIAL_LABELS_DF["reward_start_frame"].astype(int)
TRIAL_LABELS_DF["tone_stop_frame"] = TRIAL_LABELS_DF["tone_stop_frame"].astype(int)


TRIAL_LABELS_DF = TRIAL_LABELS_DF.drop(columns=[col for col in TRIAL_LABELS_DF.columns if "unnamed" in col], errors="ignore")
TRIAL_LABELS_DF = TRIAL_LABELS_DF.drop(columns=[col for col in TRIAL_LABELS_DF.columns if "box" in col], errors="ignore")

In [None]:
TRIAL_LABELS_DF.head()

In [None]:
merged_df = pd.merge(subject_start_stop_frames, TRIAL_LABELS_DF, on="video_name", how="inner")

In [None]:
merged_df = merged_df.dropna(subset=["current_subject"])

merged_df = merged_df[(merged_df['tone_start_frame'] > merged_df['start_frame']) & (merged_df['tone_start_frame'] < merged_df['stop_frame'])]

In [None]:
merged_df = merged_df.drop(columns=["start_frame", "stop_frame"], errors="ignore")
merged_df = merged_df.drop(columns=["first_timestamp", "last_timestamp"], errors="ignore")
merged_df = merged_df.drop(columns=["box_1_port_entry_frames", "box_2_port_entry_frames"], errors="ignore")
merged_df = merged_df.drop(columns=['box_1_port_entry_timestamps', 'box_2_port_entry_timestamps'], errors="ignore")


In [None]:
merged_df = merged_df.sort_values(by=["session_dir", "current_subject", "tone_start_timestamp"]).reset_index(drop=True)

In [None]:
merged_df.columns

In [None]:
merged_df.head()

In [None]:
raise ValueError()

## Adding competition labels

In [None]:
non_competitive_labels = ['rewarded', 'omission', 'both_rewarded']

In [None]:
merged_df["current_subject"] = merged_df["current_subject"].astype(str)
merged_df["condition"] = merged_df["condition"].astype(str)

In [None]:
merged_df["trial_label"] = merged_df.apply(lambda x: x["condition"] if x["condition"] in non_competitive_labels else ("win" if x["condition"] == x["current_subject"] else "lose"), axis=1)

In [None]:
merged_df

In [None]:
merged_df.to_excel("rce2_finalized_trial_labels.xlsx", index=False)

# TODO
- Merge recording file name
- Make a dictionary of trial labels 
- Add box number to sheet

In [None]:
trodes_metadata_df = pd.read_pickle("./rce_pilot_2_00_trodes_metadata.pkl")
trodes_metadata_df["video_name"] = trodes_metadata_df["video_name"].apply(lambda x: ".".join(os.path.basename(x).split(".")[:2]))

In [None]:
trodes_metadata_df = trodes_metadata_df.drop(columns = ["first_timestamp", "last_timestamp", "tone_timestamps", "tone_frames"], errors="ignore")

In [None]:
trodes_metadata_df = trodes_metadata_df.drop(columns=["all_subjects"], errors="ignore")

In [None]:
trodes_metadata_df.head()

In [None]:
final_trials_df = pd.merge(trodes_metadata_df, merged_df, on=["video_name", "current_subject", "session_dir"], how="inner")

In [None]:
final_trials_df

In [None]:
final_trials_df["tone_timestamps"] = final_trials_df.apply(lambda x: [x["tone_start_timestamp"], x["tone_stop_timestamp"]], axis=1)

In [None]:
final_trials_df["tone_frames"] = final_trials_df.apply(lambda x: [x["tone_start_frame"], x["tone_stop_frame"]], axis=1)

In [None]:
final_trials_df = final_trials_df.drop(columns=["tone_start_timestamp", "tone_stop_timestamp", "condition", "tone_start_frame", "tone_stop_frame", "tracked_subject", "reward_start_frame", "notes"], errors="ignore")

In [None]:
grouping_columns = ["session_dir", "current_subject", "trial_label", "experiment"]

In [None]:
non_grouping_columns = set(final_trials_df.columns) - set(grouping_columns)

In [None]:
final_trials_df["tone_timestamps"].iloc[0]

In [None]:
grouped_finals_trials_df = final_trials_df.groupby(grouping_columns).agg({
    col: (lambda col: list(col)) if col in ['tone_timestamps', 'tone_frames'] else 'first' for col in non_grouping_columns}).reset_index()

In [None]:
grouped_finals_trials_df["tone_frames"] = grouped_finals_trials_df["tone_frames"].apply(lambda x: np.array(x))
grouped_finals_trials_df["tone_timestamps"] = grouped_finals_trials_df["tone_timestamps"].apply(lambda x: np.array(x))

In [None]:
grouped_finals_trials_df = grouped_finals_trials_df.drop(columns=["competition_closeness"], errors="ignore")   

In [None]:
grouped_finals_trials_df.head()

In [None]:
grouped_finals_trials_df["tone_timestamps"] = grouped_finals_trials_df["tone_timestamps"].apply(lambda x: x // 20)

In [None]:
grouped_finals_trials_df["trial_label"].unique()

In [None]:
dict_from_df = {k1: {k2: v for k2, v in zip(df['trial_label'], df['tone_timestamps'])} for k1, df in grouped_finals_trials_df.groupby('recording')}

In [None]:
dict_from_df

In [None]:
import pickle


In [None]:
            
with open('recording_to_label_to_timestamps.pkl', 'wb') as f:
    pickle.dump(dict_from_df, f)

In [None]:
merged_df.to_pickle("rce2_finalized_trial_labels.pkl")

In [None]:
grouped_finals_trials_df.to_pickle("rce2_grouped_finalized_trial_labels.pkl")