# SLEAP Distance Calculation

Brief 1-2 sentence description of notebook.

In [1]:
import os
import glob
import git
import sys


In [2]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
# import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import h5py
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter


In [3]:
git_repo = git.Repo(".", search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")

In [4]:
git_root

'/blue/npadillacoreano/ryoi360/projects/reward_comp/repos/reward_competition_extention'

In [5]:
sys.path.insert(0, os.path.join(git_root, 'src'))

In [6]:
import utilities.helper
import sleap.process_pose

In [7]:
# sns.set('notebook', 'ticks', font_scale=1.2)
mpl.rcParams['figure.figsize'] = [15,6]

## Inputs & Data

Explanation of each input and where it comes from.

In [8]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
THORAX_INDEX = 1
# TONE_TIMESTAMP_DF = pd.read_csv("./proc/rce_tone_timestamp.csv", index_col=0)
# VIDEO_TO_FRAME_AND_SUBJECT_DF = pd.read_excel("../../proc/video_to_frame_and_subject.xlsx")
# VIDEO_TO_FRAME_AND_SUBJECT_DF = pd.read_pickle("./proc/rce2_spectral_granger.pkl")
VIDEO_TO_FRAME_AND_SUBJECT_DF = pd.read_pickle("./proc/rce_pilot_2_02_full_spectral.pkl")

VIDEO_TO_FRAME_AND_SUBJECT_DF["video_name"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["video_name"].apply(lambda x: x.strip(".videoTimeStamps.cameraHWSync"))

# SLEAP_DIR = os.path.join(git_root, "proc/sleap") 
# SLEAP_DIR = "/scratch/back_up/reward_competition_extention/final_proc/id_corrected"
SLEAP_DIR = "/blue/npadillacoreano/ryoi360/projects/reward_comp/final_proc/id_corrected"

OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs
MED_PC_WIDTH = 29.5
MED_PC_HEIGHT = 24
FRAME_RATE = 22
WINDOW_SIZE = 25
DISTANCE_THRESHOLD = 2

In [9]:
START_STOP_FRAME_DF = pd.read_excel("../2024_02_13_rce2_spectral_analysis/data/rce_per_subject_start_stop_video_frame.xlsx").dropna(subset=["file_path"])

In [10]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.columns

Index(['cohort', 'session_dir', 'tone_frames', 'box_1_port_entry_frames',
       'box_2_port_entry_frames', 'video_name', 'session_path', 'recording',
       'current_subject', 'subject', 'all_subjects', 'first_timestamp',
       'last_timestamp', 'video_timestamps', 'tone_timestamps',
       'box_1_port_entry_timestamps', 'box_2_port_entry_timestamps',
       'lfp_timestamps', 'mPFC_lfp_trace', 'MD_lfp_trace', 'LH_lfp_trace',
       'BLA_lfp_trace', 'vHPC_lfp_trace',
       'mPFC_power_all_frequencies_all_windows',
       'MD_power_all_frequencies_all_windows',
       'LH_power_all_frequencies_all_windows',
       'BLA_power_all_frequencies_all_windows',
       'vHPC_power_all_frequencies_all_windows', 'power_timestamps',
       'power_calculation_frequencies',
       'BLA_LH_coherence_all_frequencies_all_windows',
       'BLA_MD_coherence_all_frequencies_all_windows',
       'BLA_mPFC_coherence_all_frequencies_all_windows',
       'BLA_vHPC_coherence_all_frequencies_all_windows',
   

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

In [11]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc/" # where data is saved should always be shown in the inputs
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_PREFIX = "rce_pilot_2"

In [12]:
FULL_LFP_TRACES_PKL = "{}_full_spectral_and_sleap_poses.pkl".format(OUTPUT_PREFIX)

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

# Getting the videos where the subject is in the recording

### Looking at when each subject was in each video

In [13]:
START_STOP_FRAME_DF = pd.read_excel("./data/rce_per_subject_start_stop_video_frame.xlsx")
START_STOP_FRAME_DF = START_STOP_FRAME_DF.dropna(subset=["file_path"])

- Getting the name of the SLEAP and video files where each subject was in

In [14]:
START_STOP_FRAME_DF["sleap_name"] = START_STOP_FRAME_DF["file_path"].apply(lambda x: os.path.basename(x))
START_STOP_FRAME_DF["video_name"] = START_STOP_FRAME_DF["file_path"].apply(lambda x: ".".join(os.path.basename(x).split(".")[:2]))
START_STOP_FRAME_DF["start_frame"] = START_STOP_FRAME_DF["start_frame"].astype(int)
START_STOP_FRAME_DF["stop_frame"] = START_STOP_FRAME_DF["stop_frame"].astype(int)

In [15]:
START_STOP_FRAME_DF = START_STOP_FRAME_DF.drop(columns=["file_path", "notes"], errors="ignore")

In [16]:
START_STOP_FRAME_DF["video_name"].unique()

array(['20221214_125409_om_and_comp_6_1_and_6_3.1',
       '20221215_145401_comp_amd_om_6_1_and_6_3.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.2',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.1',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.2',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.1',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.2',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.3',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.1',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.2',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.1',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.3',
       '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1.1',
       '

In [17]:
START_STOP_FRAME_DF.head()

Unnamed: 0,start_frame,stop_frame,tracked_subject,in_video_subjects,box_number,sleap_name,video_name
0,1,25000,6.3,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1
1,27500,73600,6.1_6.3,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1
2,51500,76454,6.3,6.1_6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.fixe...,20221215_145401_comp_amd_om_6_1_and_6_3.1
3,1,48500,6.1_6.3,6.1_6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.fixe...,20221215_145401_comp_amd_om_6_1_and_6_3.1
4,32700,68257,1.2,1.1_1.2,1,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...


- Splitting each row into seperate row for each subject in the video

In [18]:
START_STOP_FRAME_DF["current_subject"] = START_STOP_FRAME_DF["tracked_subject"].apply(lambda x: str(x).split("_"))

In [19]:
START_STOP_FRAME_DF = START_STOP_FRAME_DF.explode("current_subject")

In [20]:
START_STOP_FRAME_DF.head()

Unnamed: 0,start_frame,stop_frame,tracked_subject,in_video_subjects,box_number,sleap_name,video_name,current_subject
0,1,25000,6.3,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1,6.3
1,27500,73600,6.1_6.3,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1,6.1
1,27500,73600,6.1_6.3,6.1_6.3,1,20221214_125409_om_and_comp_6_1_and_6_3.1.fixe...,20221214_125409_om_and_comp_6_1_and_6_3.1,6.3
2,51500,76454,6.3,6.1_6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.fixe...,20221215_145401_comp_amd_om_6_1_and_6_3.1,6.3
3,1,48500,6.1_6.3,6.1_6.3,1,20221215_145401_comp_amd_om_6_1_and_6_3.1.fixe...,20221215_145401_comp_amd_om_6_1_and_6_3.1,6.1


## Putting together LFP and video start/stop

In [21]:
START_STOP_FRAME_DF["video_name"].unique()[:5]

array(['20221214_125409_om_and_comp_6_1_and_6_3.1',
       '20221215_145401_comp_amd_om_6_1_and_6_3.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.2',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.1'],
      dtype=object)

In [22]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["video_name"].unique()[:5]

array(['20230612_101430_standard_comp_to_training_D1_subj_1-4_and_1-3.1',
       '20230612_101430_standard_comp_to_training_D1_subj_1-4_and_1-3.2',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.2',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.1'],
      dtype=object)

In [23]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["current_subject"].unique()

array(['1.3', '1.4', '1.1', '1.2'], dtype=object)

In [24]:
START_STOP_FRAME_DF["current_subject"].unique()

array(['6.3', '6.1', '1.2', '1.1', '1.4', '2.2', '2.1'], dtype=object)

In [25]:
LFP_AND_SLEAP_DF = pd.merge(VIDEO_TO_FRAME_AND_SUBJECT_DF, START_STOP_FRAME_DF, on=["video_name", "current_subject"], how="inner")

In [26]:
LFP_AND_SLEAP_DF.head()

Unnamed: 0,cohort,session_dir,tone_frames,box_1_port_entry_frames,box_2_port_entry_frames,video_name,session_path,recording,current_subject,subject,...,mPFC_vHPC_granger_all_frequencies_all_windows,vHPC_mPFC_granger_all_frequencies_all_windows,granger_timestamps,granger_calculation_frequencies,start_frame,stop_frame,tracked_subject,in_video_subjects,box_number,sleap_name
0,2,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.1,1.1,...,"[[0.1587, 0.256, 0.2253, 0.1144, 0.04657, 0.24...","[[0.386, 0.0818, 0.07935, 0.03128, 0.2976, 1.4...","[10000, 20000, 30000, 40000, 50000, 60000, 700...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...",1,32300,1.1_1.2,1.1_1.2,1,20230612_112630_standard_comp_to_training_D1_s...
1,2,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 930], [930, 948...","[[32974, 32976], [33201, 33207], [33208, 33211...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.1,1.1,...,"[[0.1587, 0.256, 0.2253, 0.1144, 0.04657, 0.24...","[[0.386, 0.0818, 0.07935, 0.03128, 0.2976, 1.4...","[10000, 20000, 30000, 40000, 50000, 60000, 700...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...",33000,68212,1.1,1.1_1.2,2,20230612_112630_standard_comp_to_training_D1_s...
2,2,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.2,1.2,...,"[[0.003847, 0.1844, 0.7407, 1.28, 0.6084, 0.62...","[[0.6704, 1.331, 0.391, 0.0775, 0.07806, 0.180...","[10000, 20000, 30000, 40000, 50000, 60000, 700...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...",32700,68257,1.2,1.1_1.2,1,20230612_112630_standard_comp_to_training_D1_s...
3,2,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.2,1.2,...,"[[0.003847, 0.1844, 0.7407, 1.28, 0.6084, 0.62...","[[0.6704, 1.331, 0.391, 0.0775, 0.07806, 0.180...","[10000, 20000, 30000, 40000, 50000, 60000, 700...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...",1,32300,1.1_1.2,1.1_1.2,1,20230612_112630_standard_comp_to_training_D1_s...
4,2,20230613_105657_standard_comp_to_training_D2_s...,"[[916, 1117], [3312, 3513], [5608, 5808], [740...","[[49, 67], [70, 79], [360, 366], [460, 469], [...","[[33601, 33798], [34108, 34165], [34166, 34179...",20230613_105657_standard_comp_to_training_D2_s...,/scratch/back_up/reward_competition_extention/...,20230613_105657_standard_comp_to_training_D2_s...,1.1,1.1,...,"[[2.346, 0.00597, 0.05408, 0.03165, 0.1912, 0....","[[0.0721, 0.717, 1.041, 0.581, 0.3962, 0.6646,...","[10000, 20000, 30000, 40000, 50000, 60000, 700...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...",33400,68332,1.1,1.1_1.4,1,20230613_105657_standard_comp_to_training_D2_s...


In [30]:
LFP_AND_SLEAP_DF["video_timestamps"].apply(lambda x: x.shape).head()

0    (68258,)
1    (68213,)
2    (68258,)
3    (68258,)
4    (68333,)
Name: video_timestamps, dtype: object

# Reading in the h5 files between recordings

In [45]:
LFP_AND_SLEAP_DF["sleap_glob"] = LFP_AND_SLEAP_DF["sleap_name"].apply(lambda x: glob.glob(os.path.join(SLEAP_DIR, "**", x)))


In [46]:
LFP_AND_SLEAP_DF["sleap_name"].iloc[16]

'20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.1.2_subj.id_corrected.h5'

In [49]:
LFP_AND_SLEAP_DF = LFP_AND_SLEAP_DF[LFP_AND_SLEAP_DF['sleap_glob'].apply(lambda x: len(x) >= 1)]
LFP_AND_SLEAP_DF = LFP_AND_SLEAP_DF.reset_index(drop=True)




In [50]:
LFP_AND_SLEAP_DF["sleap_path"] = LFP_AND_SLEAP_DF["sleap_glob"].apply(lambda x: x[0])

In [None]:
LFP_AND_SLEAP_DF["all_sleap_data"] = LFP_AND_SLEAP_DF["sleap_path"].apply(lambda x: sleap.process_pose.extract_sleap_data(x))


In [None]:
LFP_AND_SLEAP_DF["locations"] = LFP_AND_SLEAP_DF["all_sleap_data"].apply(lambda x: x["locations"])

In [None]:
LFP_AND_SLEAP_DF["track_names"] = LFP_AND_SLEAP_DF["all_sleap_data"].apply(lambda x: x["track_names"])

In [None]:
LFP_AND_SLEAP_DF["track_names"]

In [None]:
# Getting the coordinates of all the body parts for all the animals for the entire recording
LFP_AND_SLEAP_DF["locations"] = LFP_AND_SLEAP_DF["sleap_path"].apply(lambda x: sleap.process_pose.get_sleap_tracks_from_h5(x))
# Getting the name of the tracks which correspond to the animal id
LFP_AND_SLEAP_DF["track_names"] = LFP_AND_SLEAP_DF["sleap_path"].apply(lambda x: sleap.process_pose.get_sleap_track_names_from_h5(x))

In [None]:
LFP_AND_SLEAP_DF["locations"].iloc[0].shape

In [None]:
LFP_AND_SLEAP_DF["track_names"].iloc[0]

In [None]:
LFP_AND_SLEAP_DF["subject"]

In [None]:
LFP_AND_SLEAP_DF["track_names"]

In [None]:
# Getting the indexes of each subject from the track list
LFP_AND_SLEAP_DF["subject_to_index"] = LFP_AND_SLEAP_DF.apply(lambda x: {k: x["track_names"].index(k) for k in x["all_subjects"] if k in x["track_names"]}, axis=1)

In [None]:
LFP_AND_SLEAP_DF["subject_to_index"]

In [None]:
LFP_AND_SLEAP_DF["subject_to_tracks"] = LFP_AND_SLEAP_DF.apply(lambda x: {k:v for k, v in x["subject_to_index"].items()}, axis=1)

In [None]:
LFP_AND_SLEAP_DF["subject_to_tracks"]

In [None]:
LFP_AND_SLEAP_DF["subject_to_tracks"] = LFP_AND_SLEAP_DF.apply(lambda x: {k: x["locations"][:,:,:,v] for k, v in x["subject_to_index"].items()}, axis=1)

In [None]:
LFP_AND_SLEAP_DF["subject_to_tracks"]

In [None]:
LFP_AND_SLEAP_DF.head()

## Getting the coordinates of the corners

In [None]:
LFP_AND_SLEAP_DF["sleap_path"].iloc[0]

In [None]:
# Each corner file is the in the same folder and has the same basename of the pose tracking file 
LFP_AND_SLEAP_DF["corner_path"] = LFP_AND_SLEAP_DF["sleap_path"].apply(lambda x: x.replace("id_corrected.h5", "corner.h5").replace(".fixed", "").replace(".round_1", "").replace(".1_subj", "").replace(".2_subj", ""))


In [None]:
LFP_AND_SLEAP_DF["corner_path"].iloc[0]

In [None]:
# Getting the indexes of each corner location
LFP_AND_SLEAP_DF["corner_parts"] = LFP_AND_SLEAP_DF["corner_path"].apply(lambda x: sleap.process_pose.get_node_names_from_sleap(x))

In [None]:
LFP_AND_SLEAP_DF["corner_parts"]

In [None]:
# Getting the coordinates of all the corners
LFP_AND_SLEAP_DF["corner_to_coordinate"] = LFP_AND_SLEAP_DF["corner_path"].apply(lambda x: sleap.process_pose.get_sleap_tracks_from_h5(x))

In [None]:
# Parsing out each corner and creating a dictionary of name to coordinates
LFP_AND_SLEAP_DF["corner_to_coordinate"] = LFP_AND_SLEAP_DF.apply(lambda x: {part: x["corner_to_coordinate"][:,index,:,:] for index, part in enumerate(x["corner_parts"])}, axis=1)

In [None]:
LFP_AND_SLEAP_DF["corner_to_coordinate"]

In [None]:
# Filtering out all the Nans because there's only one labeled frame
LFP_AND_SLEAP_DF["corner_to_coordinate"] = LFP_AND_SLEAP_DF.apply(lambda x: {k: v[~np.isnan(v)][:2] for k, v in x["corner_to_coordinate"].items()}, axis=1)

In [None]:
LFP_AND_SLEAP_DF["corner_to_coordinate"].iloc[0]

# Getting the distances between corners

- Getting the average width and height so that we can convert pixels to cm

In [None]:
# Using the x-coordinates for the width
LFP_AND_SLEAP_DF["bottom_width"] = LFP_AND_SLEAP_DF["corner_to_coordinate"].apply(lambda x: x["box_bottom_right"][0] - x["box_bottom_left"][0])
LFP_AND_SLEAP_DF["top_width"] = LFP_AND_SLEAP_DF["corner_to_coordinate"].apply(lambda x: x["box_top_right"][0] - x["box_top_left"][0])


In [None]:
# Using the y-coordinates for the height
LFP_AND_SLEAP_DF["right_height"] = LFP_AND_SLEAP_DF["corner_to_coordinate"].apply(lambda x: x["box_bottom_right"][1] - x["box_top_right"][1])
LFP_AND_SLEAP_DF["left_height"] = LFP_AND_SLEAP_DF["corner_to_coordinate"].apply(lambda x: x["box_bottom_left"][1] - x["box_top_left"][1])


In [None]:
# averaging the width and height by adding both sides and then getting the mean
LFP_AND_SLEAP_DF["average_height"] = LFP_AND_SLEAP_DF.apply(lambda row: (row["right_height"] + row["left_height"])/2, axis=1)
LFP_AND_SLEAP_DF["average_width"] = LFP_AND_SLEAP_DF.apply(lambda row: (row["bottom_width"] + row["top_width"])/2, axis=1)

- Getthing the pixel to cm ratio

In [None]:
LFP_AND_SLEAP_DF["width_ratio"] = MED_PC_WIDTH / LFP_AND_SLEAP_DF["average_width"]
LFP_AND_SLEAP_DF["height_ratio"] = MED_PC_HEIGHT / LFP_AND_SLEAP_DF["average_height"]

In [None]:
LFP_AND_SLEAP_DF["height_ratio"]

In [None]:
LFP_AND_SLEAP_DF["width_ratio"]

## Converting Pixels to cm

- Converting the X-dimension

In [None]:
LFP_AND_SLEAP_DF["subject_to_tracks"]

In [None]:
LFP_AND_SLEAP_DF

In [None]:
LFP_AND_SLEAP_DF["rescaled_locations"] = LFP_AND_SLEAP_DF.apply(lambda x: {key: sleap.process_pose.fill_missing(sleap.process_pose.rescale_dimension_in_array(value, dimension=0, ratio=x["width_ratio"])) for key, value in x["subject_to_tracks"].items()}, axis=1)

- Converting the Y-dimension

In [None]:
LFP_AND_SLEAP_DF["rescaled_locations"] = LFP_AND_SLEAP_DF.apply(lambda x: {key: sleap.process_pose.rescale_dimension_in_array(value, dimension=1, ratio=x["height_ratio"]) for key, value in x["rescaled_locations"].items()}, axis=1)

In [None]:
LFP_AND_SLEAP_DF["corner_to_coordinate"]

In [None]:
LFP_AND_SLEAP_DF

In [None]:
# Normalize dictionary column
normalized = pd.json_normalize(LFP_AND_SLEAP_DF["corner_to_coordinate"])

In [None]:
normalized

In [None]:


# Drop the original column and concat the normalized DataFrame
LFP_AND_SLEAP_DF = pd.concat([LFP_AND_SLEAP_DF.drop(["corner_to_coordinate"], axis=1), normalized], axis=1)


In [None]:
LFP_AND_SLEAP_DF.head()

In [None]:
for corner in LFP_AND_SLEAP_DF["corner_parts"].iloc[0]:
    LFP_AND_SLEAP_DF[corner] = LFP_AND_SLEAP_DF.apply(lambda x: [x[corner][0]*x["width_ratio"], x[corner][1]*x["height_ratio"]], axis=1)

In [None]:
LFP_AND_SLEAP_DF

## Looking over the tracks

In [None]:
FILE_INDEX = 0

In [None]:
LFP_AND_SLEAP_DF["sleap_path"].iloc[FILE_INDEX]

In [None]:
LFP_AND_SLEAP_DF["rescaled_locations"]

In [None]:
LFP_AND_SLEAP_DF["subject"]

In [None]:
with h5py.File(LFP_AND_SLEAP_DF["sleap_path"].iloc[FILE_INDEX], "r") as f:
    dset_names = list(f.keys())
    current_subject = LFP_AND_SLEAP_DF["subject"].iloc[FILE_INDEX]
    locations = LFP_AND_SLEAP_DF["rescaled_locations"].iloc[FILE_INDEX][current_subject]
    node_names = [n.decode() for n in f["node_names"][:]]
    
print("===HDF5 datasets===")
print(dset_names)
print()

print("===locations data shape===")
print(locations.shape)
print()

print("===nodes===")
for i, name in enumerate(node_names):
    print(f"{i}: {name}")
print()

In [None]:
thorax_loc = locations[:, THORAX_INDEX, :]

In [None]:
fig, ax = plt.subplots()

plt.plot(thorax_loc[:,0],label='X-coordinates')
# Converting to negative so that we can see both x and y track
plt.plot(-1*thorax_loc[:,1], label='Y-coordinates')

plt.legend(loc="center right")
plt.title('Thorax locations')
plt.xlabel("Time in frames")
plt.ylabel("Coordinate Position")

In [None]:
plt.figure(figsize=(7,7))
plt.plot(thorax_loc[:,0],thorax_loc[:,1])


plt.title('Thorax tracks')
plt.xlabel("X-Coordinates")
plt.ylabel("Y-Coordinates")


## Creating an individual column for each pose tracking

In [None]:
raise ValueError()

In [None]:
# LFP_AND_SLEAP_DF["agent"] = LFP_AND_SLEAP_DF.apply(lambda x: list(set(x["all_subjects"]) - set(x["subject"]))[0], axis=1)

LFP_AND_SLEAP_DF["agent"] = LFP_AND_SLEAP_DF.apply(lambda x: list((set(x["all_subjects"]) - set([x["subject"]])))[0], axis=1)

In [None]:
LFP_AND_SLEAP_DF

In [None]:
LFP_AND_SLEAP_DF["subject_locations"] = LFP_AND_SLEAP_DF.apply(lambda x: x["rescaled_locations"][x["subject"]] , axis=1)

In [None]:
LFP_AND_SLEAP_DF["agent_locations"] = LFP_AND_SLEAP_DF.apply(lambda x: x["rescaled_locations"][x["agent"]] , axis=1)

## Removing unnecessary columns

In [None]:
LFP_AND_SLEAP_DF = LFP_AND_SLEAP_DF.drop(["sleap_glob", "subject_to_index", "subject_to_tracks", "corner_parts", "corner_to_coordinate", "bottom_width", "top_width", "right_height", "left_height", "average_height", "average_width", "width_ratio", "height_ratio", 'locations', 'current_subject', 'track_names', 'sleap_path', 'corner_path', 'all_sleap_data', 'rescaled_locations'], errors="ignore", axis=1)

In [None]:
LFP_AND_SLEAP_DF.columns

In [None]:
LFP_AND_SLEAP_DF.head()

# Adding the start/stop frame information

In [None]:
subject_locations.head()

- Getting relevant metadata for each video

In [None]:
# Getting all the rows that have two subjects
subject_locations["tracked_subject"] = subject_locations["tracked_subject"].apply(lambda x: str(x).split("_"))
subject_locations = subject_locations[subject_locations["tracked_subject"].apply(lambda x: len(x) == 2)]

In [None]:
# Getting the sleap filename from file path
subject_locations["sleap_filename"] = subject_locations["file_path"].apply(lambda x: os.path.basename(x))

In [None]:
# Getting the sleap fileroot from the sleap filename
subject_locations["sleap_fileroot"] = subject_locations["sleap_filename"].apply(lambda x: ".".join(x.split(".")[0:2]))

In [None]:
# Combining the start and stop frame columns into a tuple
subject_locations["start_stop_frame"] = subject_locations.apply(lambda x: (int(x["start_frame"]), int(x["stop_frame"])), axis=1)
subject_locations = subject_locations.drop(columns=["start_frame", "stop_frame"], errors="ignore")

- Merging the dataframes based on shared SLEAP file basename

In [None]:
LFP_AND_SLEAP_DF = pd.merge(left=LFP_AND_SLEAP_DF, right=subject_locations, left_on="video_name", right_on="sleap_fileroot", how="left")

- Converting the start/stop frames into timestamps

In [None]:
LFP_AND_SLEAP_DF["start_stop_timestamps"] = LFP_AND_SLEAP_DF.apply(lambda x: (x["video_timestamps"][x["start_stop_frame"][0]], x["video_timestamps"][x["start_stop_frame"][1]]), axis=1)

## Exporting

In [None]:
FULL_LFP_TRACES_PKL

In [None]:
LFP_AND_SLEAP_DF.columns

In [None]:
LFP_AND_SLEAP_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

In [None]:
raise ValueError()

## Going from frame information to ephys

# Filtering for parts of the video

In [None]:
for trace_col in [col for col in LFP_AND_SLEAP_DF.columns if "lfp_trace" in col]:
    print(trace_col)
    brain_region = trace_col.split("_")[0]
    LFP_AND_SLEAP_DF["filtered_{}_trace".format(brain_region)] = LFP_AND_SLEAP_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["lfp_timestamps"], x[trace_col])[1], axis=1)
LFP_AND_SLEAP_DF["filtered_lfp_timestamps"] = LFP_AND_SLEAP_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["lfp_timestamps"], x["lfp_timestamps"])[0], axis=1)

In [None]:
LFP_AND_SLEAP_DF = LFP_AND_SLEAP_DF.drop(columns=[col for col in LFP_AND_SLEAP_DF.columns if "lfp_trace" in col], errors="ignore")
LFP_AND_SLEAP_DF = LFP_AND_SLEAP_DF.drop(columns=["lfp_timestamps"], errors="ignore")

In [None]:
LFP_AND_SLEAP_DF.head()

In [None]:
LFP_AND_SLEAP_DF["]

In [None]:
LFP_AND_SLEAP_DF["video_timestamps"].iloc[0]

In [None]:
LFP_AND_SLEAP_DF["video_timestamps"].apply(lambda x: x.shape)

In [None]:
LFP_AND_SLEAP_DF["subject_locations"].apply(lambda x: x.shape)

In [None]:
LFP_AND_SLEAP_DF["recording"].iloc[5]

In [None]:
LFP_AND_SLEAP_DF["filtered_subject_locations"] = LFP_AND_SLEAP_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["video_timestamps"], x["subject_locations"])[1], axis=1)
LFP_AND_SLEAP_DF["filtered_agent_locations"] = LFP_AND_SLEAP_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["video_timestamps"], x["agent_locations"])[1], axis=1)
LFP_AND_SLEAP_DF["filtered_video_timestamps"] = LFP_AND_SLEAP_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["video_timestamps"], x["video_timestamps"])[0], axis=1)

In [None]:
LFP_AND_SLEAP_DF = LFP_AND_SLEAP_DF.drop(columns=["video_timestamps", "subject_locations", "agent_locations"], errors="ignore")

In [None]:
# Sorting column names for easier reading
sorted_columns = sorted(LFP_AND_SLEAP_DF.columns, key=lambda x: x.split("_")[-1])

In [None]:
LFP_AND_SLEAP_DF = LFP_AND_SLEAP_DF[sorted_columns].copy()

In [None]:
LFP_AND_SLEAP_DF.columns

In [None]:
LFP_AND_SLEAP_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

In [None]:
LFP_AND_SLEAP_DF

In [None]:
raise ValueError()

In [None]:
def find_nearest_timestamp_indices(timestamps, other_timestamps, start_index=0, stop_index=1):
    """
    Converts the start and stop indices of one data stream to timestamps, and then finds the nearest start and stop 
    timestamps in another data stream.

    Parameters:
    - timestamps (list[int or float]): The list of timestamps in the first data stream.
    - other_timestamps (list[int or float]): The list of timestamps in the other data stream.
    - start_index (int, optional): The start index in the first data stream. Defaults to 0.
    - stop_index (int, optional): The stop index in the first data stream. Defaults to 1.

    Returns:
    - tuple: The indices of the nearest start and stop timestamps in the other data stream.
    """
    # Convert start and stop indices to timestamps
    start_timestamp = timestamps[start_index]
    stop_timestamp = timestamps[stop_index]

    # Find nearest start and stop timestamps in other data stream
    nearest_start_index = utilities.helper.find_nearest_index(other_timestamps, start_timestamp)
    nearest_stop_index = utilities.helper.find_nearest_index(other_timestamps, stop_timestamp) 

    return nearest_start_index, nearest_stop_index

# OLD CODE BELOW

In [32]:
raise ValueError()

ValueError: 

- Getting the names of each subject

In [None]:
LFP_AND_SLEAP_DF["video_name"].unique()

In [None]:
glob.glob(SLEAP_DIR+ "/*/*id_corrected*.h5")[:10]

In [None]:
SLEAP_DIR

In [None]:
START_STOP_FRAME_DF

In [None]:
# LFP_AND_SLEAP_DF["video_path"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["video_name"].apply(lambda x: os.path.join(SLEAP_DIR, "*", x + "*.h5"))
VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_glob"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["video_name"].apply(lambda x: glob.glob(os.path.join(SLEAP_DIR, "*", x + "*id_corrected*.h5")))
# VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_glob"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["video_name"].apply(lambda x: os.path.join(SLEAP_DIR, "*", x + "*2_subj*.h5"))


In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF[VIDEO_TO_FRAME_AND_SUBJECT_DF['sleap_glob'].apply(lambda x: len(x) >= 1)]
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF.reset_index(drop=True)




In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_glob"].iloc[0]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.columns

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF