# Spike Train Calculation

Brief 1-2 sentence description of notebook.

In [1]:
import os
import glob
import git
import sys


In [2]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
# import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import h5py
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter


In [3]:
git_repo = git.Repo(".", search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")

In [4]:
git_root

'/nancy/projects/reward_competition_extention'

In [5]:
sys.path.insert(0, os.path.join(git_root, 'src'))

In [6]:
import utilities.helper

In [7]:
# sns.set('notebook', 'ticks', font_scale=1.2)
mpl.rcParams['figure.figsize'] = [15,6]

## Functions

In [41]:
def calculate_rolling_avg_firing_rate(firing_times, window_size=2000, slide=2000, stop_time=None):
    """
    Calculates the rolling average firing rate of a neuron.

    Parameters:
    - firing_times (numpy.ndarray): An array of firing times of a neuron.
    - window_size (int or float): The size of the window for calculating the average firing rate.
    - slide (int or float): The amount to slide the window for each calculation.
    - stop_time (int or float, optional): The timestamp to stop the calculation at. If None, the calculation goes until the end of the firing times.

    Returns:
    - tuple: Two numpy.ndarrays, the rolling average firing rates and the starting timestamps of each window.
    """
    # Initialize lists to store the results
    avg_firing_rates = []
    window_starts = []

    # If no stop_time is provided, use the last firing time
    if stop_time is None:
        stop_time = firing_times[-1]

    # Calculate the number of windows
    num_windows = int((stop_time - window_size) / slide) + 1

    # Loop over each window
    for i in range(num_windows):
        # Calculate the start and end of the window
        start = i * slide
        end = start + window_size

        # Calculate the average firing rate for this window
        firing_rate = np.sum((firing_times >= start) & (firing_times < end)) / window_size

        # Store the results
        avg_firing_rates.append(firing_rate)
        window_starts.append(start)

    return np.array(avg_firing_rates), np.array(window_starts)

## Inputs & Data

Explanation of each input and where it comes from.

In [8]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case

VIDEO_TO_FRAME_AND_SUBJECT_DF = pd.read_pickle(os.path.join(git_root, "notebooks/export/proc/rce_pilot_2_full_lfp_traces_and_sleap_poses.pkl"))

ALL_PHY_DIR = glob.glob("/scratch/back_up/reward_competition_extention/final_proc/phy_curation/*")

OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs

SAMPLING_RATE = 20000

In [9]:
VIDEO_TO_FRAME_AND_SUBJECT_DF

Unnamed: 0,agent,cohort,session_dir,sleap_filename,sleap_fileroot,start_stop_frame,tone_frames,port_entry_frames,box_top_left,box_bottom_left,...,tone_timestamps,port_entry_timestamps,start_stop_timestamps,filtered_lfp_timestamps,filtered_video_timestamps,filtered_mpfc_trace,filtered_vhpc_trace,filtered_bla_trace,filtered_lh_trace,filtered_md_trace
0,1.2,2,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,"(1, 39500)","[[1029, 1229], [3425, 3624], [5721, 5920], [75...","[[102, 105], [110, 148], [193, 199], [200, 202...","[29.623370221193532, 10.85380707317621]","[30.23007505102494, 34.598706975722735]",...,"[[1030879, 1230878], [3430878, 3630878], [5730...","[[102478, 104881], [110481, 147879], [193083, ...","(1384, 39574889)","[1400, 1420, 1440, 1460, 1480, 1500, 1520, 154...","[1384, 1500, 2770, 4156, 5542, 5542, 6927, 831...","[0.015505511, 0.62215865, 1.1086441, 1.3373504...","[-1.3505517, -0.8568693, -0.33960325, 0.141501...","[-1.7142574, -1.1298128, -0.101937994, 0.48760...","[-2.3216858, -1.907385, -1.5471234, -1.4290377...","[-2.3028033, -2.0405018, -1.7475417, -1.429032..."
1,1.1,2,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,"(1, 39500)","[[1029, 1229], [3425, 3624], [5721, 5920], [75...","[[102, 105], [110, 148], [193, 199], [200, 202...","[29.623370221193532, 10.85380707317621]","[30.23007505102494, 34.598706975722735]",...,"[[1030879, 1230878], [3430878, 3630878], [5730...","[[102478, 104881], [110481, 147879], [193083, ...","(1384, 39574889)","[1400, 1420, 1440, 1460, 1480, 1500, 1520, 154...","[1384, 1500, 2770, 4156, 5542, 5542, 6927, 831...","[0.6574679, 0.11276958, -0.3659692, -0.3723524...","[0.7847764, 0.6408233, 0.48410022, 0.30299798,...","[0.5454766, 0.5454766, 0.5228427, 0.518316, 0....","[0.19785033, -0.25780496, -0.2787891, 0.119909...","[-0.62384546, -0.897785, -1.1164762, -0.982959..."
2,1.4,2,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,"(1, 32316)","[[1098, 1297], [3492, 3693], [5788, 5988], [75...","[[788, 835], [838, 839], [839, 871], [871, 900...","[28.94954695212123, 11.559275766924861]","[29.794023445649177, 35.275466614423074]",...,"[[1099250, 1299250], [3499256, 3699252], [5799...","[[788852, 836852], [838652, 840050], [840652, ...","(1384, 32376683)","[1400, 1420, 1440, 1460, 1480, 1500, 1520, 154...","[1384, 2770, 2770, 4155, 5541, 6927, 6927, 831...","[0.31349525, 0.40279388, 0.41229373, 0.3039953...","[0.35449925, 0.8595823, 1.0148718, 0.86899376,...","[0.6661627, 0.8376996, 0.9243008, 1.0025749, 0...","[0.6916378, 0.96981716, 0.9088464, 0.7583246, ...","[0.3162679, 0.49376523, 0.4066302, 0.13715701,..."
3,1.1,2,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,"(1, 32316)","[[1098, 1297], [3492, 3693], [5788, 5988], [75...","[[788, 835], [838, 839], [839, 871], [871, 900...","[28.94954695212123, 11.559275766924861]","[29.794023445649177, 35.275466614423074]",...,"[[1099250, 1299250], [3499256, 3699252], [5799...","[[788852, 836852], [838652, 840050], [840652, ...","(1384, 32376683)","[1400, 1420, 1440, 1460, 1480, 1500, 1520, 154...","[1384, 2770, 2770, 4155, 5541, 6927, 6927, 831...","[1.8754106, 0.06580388, -1.1351169, 0.23031357...","[2.2451546, 1.6649991, 1.3395461, 1.8442342, 1...","[2.131642, 1.495331, 1.0403687, 1.6066855, 1.8...","[0.6786791, -0.12149195, -0.31839266, 0.494346...","[0.7159058, -0.05324919, -0.6389903, 0.0, 0.44..."
4,1.2,2,20230620_114347_standard_comp_to_omission_D4_s...,20230620_114347_standard_comp_to_omission_D4_s...,20230620_114347_standard_comp_to_omission_D4_s...,"(2027, 32239)","[[1058, 1265], [3460, 3659], [5756, 5955], [75...","[[6, 29], [32, 40], [168, 183], [197, 218], [3...","[28.726306016601946, 10.842459364362139]","[30.091010784999508, 34.98257973535351]",...,"[[1057255, 1257260], [3457285, 3657287], [5757...","[[6242, 30447], [32245, 40242], [169046, 18424...","(2021982, 32290759)","[2022000, 2022020, 2022040, 2022060, 2022080, ...","[2021982, 2023278, 2023368, 2024753, 2026139, ...","[0.96733546, 0.90309834, 0.82374656, 0.7462842...","[-0.43027794, -0.39102963, -0.3735859, -0.4099...","[1.2360266, 1.2037544, 1.0714383, 0.677717, 0....","[0.49828878, 0.45471218, 0.39976782, 0.2576702...","[0.5827334, 0.48292822, 0.37990353, 0.31873262..."
5,1.1,2,20230620_114347_standard_comp_to_omission_D4_s...,20230620_114347_standard_comp_to_omission_D4_s...,20230620_114347_standard_comp_to_omission_D4_s...,"(2027, 32239)","[[1058, 1265], [3460, 3659], [5756, 5955], [75...","[[6, 29], [32, 40], [168, 183], [197, 218], [3...","[28.726306016601946, 10.842459364362139]","[30.091010784999508, 34.98257973535351]",...,"[[1057255, 1257260], [3457285, 3657287], [5757...","[[6242, 30447], [32245, 40242], [169046, 18424...","(2021982, 32290759)","[2022000, 2022020, 2022040, 2022060, 2022080, ...","[2021982, 2023278, 2023368, 2024753, 2026139, ...","[-0.5461191, -0.6570836, -0.6788413, -0.948637...","[0.70498204, 0.87817836, 1.0172232, 0.93916297...","[-1.1168182, -0.9848528, -1.0435041, -1.314766...","[-0.9343849, -0.7208996, -0.73327553, -1.08289...","[-1.0043497, -0.92557716, -1.0092729, -1.51144..."
6,1.2,2,20230621_111240_standard_comp_to_omission_D5_s...,20230621_111240_standard_comp_to_omission_D5_s...,20230621_111240_standard_comp_to_omission_D5_s...,"(1, 33500)","[[477, 1286], [2484, 2683], [4880, 5079], [717...","[[223, 250], [252, 254], [257, 257], [258, 297...","[29.368871804533384, 11.233148401733333]","[30.292670448640752, 35.366774292703255]",...,"[[478042, 1287653], [2488068, 2688073], [48880...","[[223038, 250239], [251039, 253641], [256439, ...","(1384, 33562987)","[1400, 1420, 1440, 1460, 1480, 1500, 1520, 154...","[1384, 1384, 2770, 4156, 5541, 5541, 6927, 831...","[-0.31740695, -0.28688705, -0.49137035, 0.1098...","[-1.1510314, -1.6666739, -1.9574865, -1.901279...","[-0.77811754, -1.3635237, -1.4598794, -1.17626...","[0.01645097, -0.32243901, -0.60868585, -0.4737...","[-0.19407488, -0.27361378, -0.39769444, -0.330..."


## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

In [10]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc/" # where data is saved should always be shown in the inputs
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_PREFIX = "rce_pilot_2"

In [11]:
FULL_LFP_TRACES_PKL = "{}_lfp_traces_sleap_spikes.pkl".format(OUTPUT_PREFIX)

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

# Reading in Phy

- Reading in a spreadsheet of all the unit classifications
    - They are divided up into good units, multi-units, and noise

In [12]:
ALL_PHY_DIR

['/scratch/back_up/reward_competition_extention/final_proc/phy_curation/20230618_100636_standard_comp_to_omission_D2_subj_1_4_t4b3L_box1_merged.rec',
 '/scratch/back_up/reward_competition_extention/final_proc/phy_curation/20230618_100636_standard_comp_to_omission_D2_subj_1_1_t1b2L_box2_merged.rec',
 '/scratch/back_up/reward_competition_extention/final_proc/phy_curation/20230621_111240_standard_comp_to_omission_D5_subj_1-4_t3b3L_box1_merged.rec',
 '/scratch/back_up/reward_competition_extention/final_proc/phy_curation/20230620_114347_standard_comp_to_omission_D4_subj_1-1_t1b2L_box_2_merged.rec',
 '/scratch/back_up/reward_competition_extention/final_proc/phy_curation/20230617_115521_standard_comp_to_omission_D1_subj_1-2_t2b2L_box2_merged.rec',
 '/scratch/back_up/reward_competition_extention/final_proc/phy_curation/20230617_115521_standard_comp_to_omission_D1_subj_1-1_t1b3L_box1_merged.rec',
 '/scratch/back_up/reward_competition_extention/final_proc/phy_curation/20230620_114347_standard_co

In [13]:
recording_to_cluster_info = {}
for recording_dir in ALL_PHY_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        file_path = os.path.join(recording_dir, "phy", "cluster_info.tsv")
        recording_to_cluster_info[recording_basename] = pd.read_csv(file_path, sep="\t")
    except Exception as e:
        print(e)

In [14]:
recording_to_cluster_info[list(recording_to_cluster_info.keys())[1]].head()

Unnamed: 0,cluster_id,1_5_ms_isi,1ms_isi,2ms_isi,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id
0,0,618,174,1492,10.375586,0,0,0.0,17.72784,mua,58150,0,1
1,1,0,0,1,24.589169,0,0,0.0,0.894471,mua,2934,0,2
2,2,74,12,240,8.472431,1,0,20.0,10.257148,mua,33645,0,3
3,3,0,0,0,31.092701,1,0,20.0,0.69509,noise,2280,0,4
4,4,0,0,0,31.629177,1,0,20.0,0.165236,noise,542,0,5


- Combining all the unit info dataframes and adding the recording name

In [15]:
recording_to_cluster_info_df = pd.concat(recording_to_cluster_info, names=['recording_name']).reset_index(level=1, drop=True).reset_index()


In [16]:
recording_to_cluster_info_df.head()

Unnamed: 0,recording_name,cluster_id,1_5_ms_isi,1ms_isi,2ms_isi,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id
0,20230618_100636_standard_comp_to_omission_D2_s...,8,0,0,1,9.198713,3,0.0,60.0,0.217516,mua,746,0,9.0
1,20230618_100636_standard_comp_to_omission_D2_s...,9,1,1,2,12.508228,3,0.0,60.0,1.605419,good,5506,0,10.0
2,20230618_100636_standard_comp_to_omission_D2_s...,19,1,0,2,13.43512,15,0.0,300.0,0.503261,good,1726,0,20.0
3,20230618_100636_standard_comp_to_omission_D2_s...,20,0,0,0,7.34239,20,0.0,400.0,0.219265,mua,752,0,22.0
4,20230618_100636_standard_comp_to_omission_D2_s...,21,47,10,122,10.664793,21,0.0,420.0,0.605604,noise,2077,0,23.0


- Filtering for the good units

In [17]:
good_unit_cluster_info_df = recording_to_cluster_info_df[recording_to_cluster_info_df["group"] == "good"].reset_index(drop=True)

In [18]:
good_unit_cluster_info_df.head()

Unnamed: 0,recording_name,cluster_id,1_5_ms_isi,1ms_isi,2ms_isi,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id
0,20230618_100636_standard_comp_to_omission_D2_s...,9,1,1,2,12.508228,3,0.0,60.0,1.605419,good,5506,0,10.0
1,20230618_100636_standard_comp_to_omission_D2_s...,19,1,0,2,13.43512,15,0.0,300.0,0.503261,good,1726,0,20.0
2,20230618_100636_standard_comp_to_omission_D2_s...,65,2,1,3,8.4668,2,0.0,40.0,2.628268,good,9014,0,8.0
3,20230618_100636_standard_comp_to_omission_D2_s...,83,9,3,26,13.022879,2,0.0,40.0,10.683353,good,36640,0,5.0
4,20230618_100636_standard_comp_to_omission_D2_s...,87,3,0,4,15.160955,0,0.0,0.0,2.416584,good,8288,0,1.0


In [19]:
recording_to_good_unit_ids = good_unit_cluster_info_df.groupby('recording_name')['cluster_id'].apply(list).to_dict()

- A list of all the unit IDs that each spike came from in order
    - First item is first spike, second item is second spike, etc.

In [20]:
recording_to_spike_clusters = {}
for recording_dir in ALL_PHY_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        file_path = os.path.join(recording_dir, "phy", "spike_clusters.npy")
        recording_to_spike_clusters[recording_basename] = np.load(file_path)
    except Exception as e:
        print(e)

In [21]:
recording_to_spike_clusters[list(recording_to_spike_clusters.keys())[0]]

array([82, 82, 82, ..., 87, 83, 99], dtype=int32)

In [22]:
recording_to_spike_clusters[list(recording_to_spike_clusters.keys())[0]].shape

(251953,)

- The times that all the spikes happened

In [23]:
recording_to_spike_times = {}
for recording_dir in ALL_PHY_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        file_path = os.path.join(recording_dir, "phy", "spike_times.npy")
        recording_to_spike_times[recording_basename] = np.load(file_path)
    except Exception as e:
        print(e)

In [24]:
recording_to_spike_times[list(recording_to_spike_times.keys())[0]]

array([[      46],
       [     266],
       [     437],
       ...,
       [67098020],
       [67098467],
       [67098849]])

In [25]:
recording_to_spike_times[list(recording_to_spike_times.keys())[0]].shape

(251953, 1)

### Combining everything into a dataframe

In [26]:
recording_to_spike_df = {}
for recording_dir in ALL_PHY_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        cluster_info_path = os.path.join(recording_dir, "phy", "cluster_info.tsv")
        cluster_info_df = pd.read_csv(cluster_info_path, sep="\t")

        spike_clusters_path = os.path.join(recording_dir, "phy", "spike_clusters.npy")
        spike_clusters = np.load(spike_clusters_path)
        
        spike_times_path = os.path.join(recording_dir, "phy", "spike_times.npy")
        spike_times = np.load(spike_times_path)

        spike_df = pd.DataFrame({'spike_clusters': spike_clusters, 'spike_times': spike_times.T[0]})

        merged_df = spike_df.merge(cluster_info_df, left_on='spike_clusters', right_on='cluster_id', how="left")
        merged_df["recording_name"] = recording_basename

        merged_df["timestamp_isi"] = merged_df.groupby('spike_clusters')["spike_times"].diff()
        merged_df["current_isi"] = merged_df["timestamp_isi"] / SAMPLING_RATE
        
        if not merged_df.empty:
            recording_to_spike_df[recording_basename] = merged_df
       
    except Exception as e:
        print(e)

In [27]:
cluster_info_df.head()

Unnamed: 0,cluster_id,1_5_ms_isi,1ms_isi,2ms_isi,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id
0,11,1,0,5,8.9236,3,0,60.0,2.805744,good,5778,0,13
1,12,0,0,0,9.733616,3,0,60.0,2.012289,noise,4144,0,14
2,44,3,1,29,9.739503,17,0,340.0,13.732511,good,28280,0,46
3,45,38,12,88,9.464285,18,0,360.0,9.79534,mua,20172,0,47
4,46,32,5,125,10.028766,18,0,360.0,6.568588,mua,13527,0,48


In [28]:
spike_times

array([[     143],
       [     168],
       [     274],
       ...,
       [38957896],
       [38957942],
       [38957990]])

In [29]:
spike_clusters

array([252, 246, 279, ...,  62,  60,  62], dtype=int32)

- Combining the spike time df for all recordings

In [30]:
all_spike_time_df = pd.concat(recording_to_spike_df.values())

In [31]:
all_spike_time_df = all_spike_time_df[all_spike_time_df["group"] == "good"].reset_index(drop=True)

In [32]:
all_spike_time_df.head()

Unnamed: 0,spike_clusters,spike_times,cluster_id,1_5_ms_isi,1ms_isi,2ms_isi,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id,recording_name,timestamp_isi,current_isi
0,65,937,65,2,1,3,8.4668,2,0.0,40.0,2.628268,good,9014,0,8.0,20230618_100636_standard_comp_to_omission_D2_s...,,
1,123,998,123,1,1,1,19.984566,24,0.0,480.0,2.384219,good,8177,0,28.0,20230618_100636_standard_comp_to_omission_D2_s...,,
2,103,1125,103,12,2,32,14.402934,6,0.0,120.0,8.755451,good,30028,0,11.0,20230618_100636_standard_comp_to_omission_D2_s...,,
3,123,1327,123,1,1,1,19.984566,24,0.0,480.0,2.384219,good,8177,0,28.0,20230618_100636_standard_comp_to_omission_D2_s...,329.0,0.01645
4,103,1346,103,12,2,32,14.402934,6,0.0,120.0,8.755451,good,30028,0,11.0,20230618_100636_standard_comp_to_omission_D2_s...,221.0,0.01105


In [33]:
all_spike_time_df.tail()

Unnamed: 0,spike_clusters,spike_times,cluster_id,1_5_ms_isi,1ms_isi,2ms_isi,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id,recording_name,timestamp_isi,current_isi
2017369,255,38957244,255,0,0,0,14.943707,6,0.0,120.0,0.691967,good,1425,0,18.0,20230619_115321_standard_comp_to_omission_D3_s...,173.0,0.00865
2017370,255,38957331,255,0,0,0,14.943707,6,0.0,120.0,0.691967,good,1425,0,18.0,20230619_115321_standard_comp_to_omission_D3_s...,87.0,0.00435
2017371,262,38957511,262,1,1,1,11.366614,6,0.0,120.0,8.020505,good,16517,0,17.0,20230619_115321_standard_comp_to_omission_D3_s...,3072.0,0.1536
2017372,44,38957537,44,3,1,29,9.739503,17,0.0,340.0,13.732511,good,28280,0,46.0,20230619_115321_standard_comp_to_omission_D3_s...,2342.0,0.1171
2017373,134,38957868,134,5,4,17,8.562255,2,0.0,40.0,14.537135,good,29937,0,11.0,20230619_115321_standard_comp_to_omission_D3_s...,1151.0,0.05755


## Grouping all the neurons by recording

In [34]:
# Grouping all spike times by neuron and recording

grouped_df = all_spike_time_df.groupby(['spike_clusters', 'recording_name'])["spike_times"].apply(lambda x: sorted(list(x))).reset_index()
grouped_df = grouped_df.sort_values(by=['recording_name', 'spike_clusters']).reset_index(drop=True)
grouped_df = grouped_df.groupby('recording_name').agg({'spike_clusters': lambda x: list(x), 'spike_times': lambda x: list(x)}).reset_index()

In [35]:
grouped_df

Unnamed: 0,recording_name,spike_clusters,spike_times
0,20230617_115521_standard_comp_to_omission_D1_s...,"[3, 4, 5, 6, 17, 28, 29, 39, 48, 52, 53, 54, 5...","[[673223, 2367433, 2569212, 3969647, 3973243, ..."
1,20230617_115521_standard_comp_to_omission_D1_s...,"[16, 17, 20, 25, 26, 34, 35, 39, 40, 42, 43, 4...","[[958, 5233, 6331, 7475, 8599, 10550, 23459, 2..."
2,20230618_100636_standard_comp_to_omission_D2_s...,"[7, 11, 12, 48, 50, 51, 61, 64, 76, 84, 90, 98...","[[506, 1419, 2358, 5104, 6785, 7620, 9801, 120..."
3,20230618_100636_standard_comp_to_omission_D2_s...,"[9, 19, 65, 83, 87, 93, 99, 103, 105, 106, 118...","[[458431, 506645, 509846, 545481, 571000, 6011..."
4,20230619_115321_standard_comp_to_omission_D3_s...,"[11, 44, 132, 134, 186, 187, 227, 250, 252, 25...","[[17747, 20655, 24011, 34755, 67305, 110220, 1..."
5,20230620_114347_standard_comp_to_omission_D4_s...,"[2, 22, 64, 89, 98, 100, 119, 121, 122, 126]","[[264055, 325873, 327931, 330996, 334463, 3590..."
6,20230620_114347_standard_comp_to_omission_D4_s...,"[14, 28, 41, 42, 43, 45, 48, 53, 58, 68, 124, ...","[[5884, 7172, 11405, 19311, 19371, 23729, 2627..."
7,20230621_111240_standard_comp_to_omission_D5_s...,"[6, 7, 13, 15, 16, 17, 22, 24, 26, 36, 37, 46,...","[[1891, 5872, 8366, 16312, 17233, 24302, 24337..."


In [38]:
VIDEO_TO_FRAME_AND_SUBJECT_DF = pd.merge(VIDEO_TO_FRAME_AND_SUBJECT_DF, grouped_df, left_on='recording', right_on="recording_name", how='inner')

In [42]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.head()

Unnamed: 0,agent,cohort,session_dir,sleap_filename,sleap_fileroot,start_stop_frame,tone_frames,port_entry_frames,box_top_left,box_bottom_left,...,filtered_lfp_timestamps,filtered_video_timestamps,filtered_mpfc_trace,filtered_vhpc_trace,filtered_bla_trace,filtered_lh_trace,filtered_md_trace,recording_name,spike_clusters,spike_times
0,1.2,2,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,"(1, 39500)","[[1029, 1229], [3425, 3624], [5721, 5920], [75...","[[102, 105], [110, 148], [193, 199], [200, 202...","[29.623370221193532, 10.85380707317621]","[30.23007505102494, 34.598706975722735]",...,"[1400, 1420, 1440, 1460, 1480, 1500, 1520, 154...","[1384, 1500, 2770, 4156, 5542, 5542, 6927, 831...","[0.015505511, 0.62215865, 1.1086441, 1.3373504...","[-1.3505517, -0.8568693, -0.33960325, 0.141501...","[-1.7142574, -1.1298128, -0.101937994, 0.48760...","[-2.3216858, -1.907385, -1.5471234, -1.4290377...","[-2.3028033, -2.0405018, -1.7475417, -1.429032...",20230617_115521_standard_comp_to_omission_D1_s...,"[3, 4, 5, 6, 17, 28, 29, 39, 48, 52, 53, 54, 5...","[[673223, 2367433, 2569212, 3969647, 3973243, ..."
1,1.1,2,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,20230617_115521_standard_comp_to_omission_D1_s...,"(1, 39500)","[[1029, 1229], [3425, 3624], [5721, 5920], [75...","[[102, 105], [110, 148], [193, 199], [200, 202...","[29.623370221193532, 10.85380707317621]","[30.23007505102494, 34.598706975722735]",...,"[1400, 1420, 1440, 1460, 1480, 1500, 1520, 154...","[1384, 1500, 2770, 4156, 5542, 5542, 6927, 831...","[0.6574679, 0.11276958, -0.3659692, -0.3723524...","[0.7847764, 0.6408233, 0.48410022, 0.30299798,...","[0.5454766, 0.5454766, 0.5228427, 0.518316, 0....","[0.19785033, -0.25780496, -0.2787891, 0.119909...","[-0.62384546, -0.897785, -1.1164762, -0.982959...",20230617_115521_standard_comp_to_omission_D1_s...,"[16, 17, 20, 25, 26, 34, 35, 39, 40, 42, 43, 4...","[[958, 5233, 6331, 7475, 8599, 10550, 23459, 2..."
2,1.4,2,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,"(1, 32316)","[[1098, 1297], [3492, 3693], [5788, 5988], [75...","[[788, 835], [838, 839], [839, 871], [871, 900...","[28.94954695212123, 11.559275766924861]","[29.794023445649177, 35.275466614423074]",...,"[1400, 1420, 1440, 1460, 1480, 1500, 1520, 154...","[1384, 2770, 2770, 4155, 5541, 6927, 6927, 831...","[0.31349525, 0.40279388, 0.41229373, 0.3039953...","[0.35449925, 0.8595823, 1.0148718, 0.86899376,...","[0.6661627, 0.8376996, 0.9243008, 1.0025749, 0...","[0.6916378, 0.96981716, 0.9088464, 0.7583246, ...","[0.3162679, 0.49376523, 0.4066302, 0.13715701,...",20230618_100636_standard_comp_to_omission_D2_s...,"[7, 11, 12, 48, 50, 51, 61, 64, 76, 84, 90, 98...","[[506, 1419, 2358, 5104, 6785, 7620, 9801, 120..."
3,1.1,2,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,"(1, 32316)","[[1098, 1297], [3492, 3693], [5788, 5988], [75...","[[788, 835], [838, 839], [839, 871], [871, 900...","[28.94954695212123, 11.559275766924861]","[29.794023445649177, 35.275466614423074]",...,"[1400, 1420, 1440, 1460, 1480, 1500, 1520, 154...","[1384, 2770, 2770, 4155, 5541, 6927, 6927, 831...","[1.8754106, 0.06580388, -1.1351169, 0.23031357...","[2.2451546, 1.6649991, 1.3395461, 1.8442342, 1...","[2.131642, 1.495331, 1.0403687, 1.6066855, 1.8...","[0.6786791, -0.12149195, -0.31839266, 0.494346...","[0.7159058, -0.05324919, -0.6389903, 0.0, 0.44...",20230618_100636_standard_comp_to_omission_D2_s...,"[9, 19, 65, 83, 87, 93, 99, 103, 105, 106, 118...","[[458431, 506645, 509846, 545481, 571000, 6011..."
4,1.2,2,20230620_114347_standard_comp_to_omission_D4_s...,20230620_114347_standard_comp_to_omission_D4_s...,20230620_114347_standard_comp_to_omission_D4_s...,"(2027, 32239)","[[1058, 1265], [3460, 3659], [5756, 5955], [75...","[[6, 29], [32, 40], [168, 183], [197, 218], [3...","[28.726306016601946, 10.842459364362139]","[30.091010784999508, 34.98257973535351]",...,"[2022000, 2022020, 2022040, 2022060, 2022080, ...","[2021982, 2023278, 2023368, 2024753, 2026139, ...","[0.96733546, 0.90309834, 0.82374656, 0.7462842...","[-0.43027794, -0.39102963, -0.3735859, -0.4099...","[1.2360266, 1.2037544, 1.0714383, 0.677717, 0....","[0.49828878, 0.45471218, 0.39976782, 0.2576702...","[0.5827334, 0.48292822, 0.37990353, 0.31873262...",20230620_114347_standard_comp_to_omission_D4_s...,"[2, 22, 64, 89, 98, 100, 119, 121, 122, 126]","[[264055, 325873, 327931, 330996, 334463, 3590..."


In [40]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["average_firing_rate"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["spike_times"].apply(lambda x: len(x) / (x[-1] - x[0]))

KeyError: ''

In [36]:
raised ValueError("")

SyntaxError: invalid syntax (237021957.py, line 1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["spike_clusters"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["recording"].map(grouped_df.groupby('recording_name')['spike_clusters'].apply(lambda x: list(x)).to_dict())
VIDEO_TO_FRAME_AND_SUBJECT_DF["spike_times"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["recording"].map(grouped_df.groupby('recording_name')['spike_times'].apply(lambda x: list(x)).to_dict())

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["spike_times"].iloc[0]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.columns

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["recording"].iloc[0]

In [None]:
1215337/20000

In [None]:
all_spike_time_df["recording_name"].unique()

In [None]:
all_spike_time_df["spike_times"].min()

In [None]:
all_spike_time_df["spike_times"].max()

In [None]:
all_spike_time_df["recording_name"].unique()

In [None]:
example_spike_times = all_spike_time_df[(all_spike_time_df["recording_name"] == '20230618_100636_standard_comp_to_omission_D2_subj_1_4_t4b3L_box1_merged') & (all_spike_time_df["spike_clusters"] == 65)]["spike_times"]

In [None]:
example_average_fr, example_average_timestamps = calculate_rolling_avg_firing_rate(example_spike_times.values, 2000, 2000)

In [None]:
example_average_fr[:10]

In [None]:
raise ValueError("Stop here")

# Reading in the h5 files between recordings

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.head()

- Getting the names of each subject

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["video_name"].iloc[0]

In [None]:
# VIDEO_TO_FRAME_AND_SUBJECT_DF["video_path"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["video_name"].apply(lambda x: os.path.join(SLEAP_DIR, "*", x + "*.h5"))
VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_glob"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["video_name"].apply(lambda x: glob.glob(os.path.join(SLEAP_DIR, "*", x + "*2_subj*.h5")))
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF[VIDEO_TO_FRAME_AND_SUBJECT_DF['sleap_glob'].apply(lambda x: len(x) == 1)]
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF.reset_index(drop=True)


VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_path"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_glob"].apply(lambda x: x[0])

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["all_sleap_data"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_path"].apply(lambda x: sleap.process_pose.extract_sleap_data(x))


In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["locations"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["all_sleap_data"].apply(lambda x: x["locations"])

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["track_names"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["all_sleap_data"].apply(lambda x: x["track_names"])

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["track_names"]

In [None]:
# Getting the coordinates of all the body parts for all the animals for the entire recording
VIDEO_TO_FRAME_AND_SUBJECT_DF["locations"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_path"].apply(lambda x: sleap.process_pose.get_sleap_tracks_from_h5(x))
# Getting the name of the tracks which correspond to the animal id
VIDEO_TO_FRAME_AND_SUBJECT_DF["track_names"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_path"].apply(lambda x: sleap.process_pose.get_sleap_track_names_from_h5(x))

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["locations"].iloc[0].shape

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["track_names"].iloc[0]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject"]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["track_names"]

In [None]:
# Getting the indexes of each subject from the track list
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_index"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: {k: x["track_names"].index(k) for k in x["all_subjects"] if k in x["track_names"]}, axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_index"]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_tracks"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: {k:v for k, v in x["subject_to_index"].items()}, axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_tracks"]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_tracks"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: {k: x["locations"][:,:,:,v] for k, v in x["subject_to_index"].items()}, axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_tracks"]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.head()

## Getting the coordinates of the corners

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_path"].iloc[0]

In [None]:
# Each corner file is the in the same folder and has the same basename of the pose tracking file 
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_path"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_path"].apply(lambda x: x.replace("id_corrected.h5", "corner.h5").replace(".fixed", "").replace(".round_1", "").replace(".1_subj", "").replace(".2_subj", ""))


In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_path"].iloc[0]

In [None]:
# Getting the indexes of each corner location
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_parts"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_path"].apply(lambda x: sleap.process_pose.get_node_names_from_sleap(x))

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_parts"]

In [None]:
# Getting the coordinates of all the corners
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_path"].apply(lambda x: sleap.process_pose.get_sleap_tracks_from_h5(x))

In [None]:
# Parsing out each corner and creating a dictionary of name to coordinates
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: {part: x["corner_to_coordinate"][:,index,:,:] for index, part in enumerate(x["corner_parts"])}, axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"]

In [None]:
# Filtering out all the Nans because there's only one labeled frame
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: {k: v[~np.isnan(v)][:2] for k, v in x["corner_to_coordinate"].items()}, axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"].iloc[0]

# Getting the distances between corners

- Getting the average width and height so that we can convert pixels to cm

In [None]:
# Using the x-coordinates for the width
VIDEO_TO_FRAME_AND_SUBJECT_DF["bottom_width"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"].apply(lambda x: x["box_bottom_right"][0] - x["box_bottom_left"][0])
VIDEO_TO_FRAME_AND_SUBJECT_DF["top_width"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"].apply(lambda x: x["box_top_right"][0] - x["box_top_left"][0])


In [None]:
# Using the y-coordinates for the height
VIDEO_TO_FRAME_AND_SUBJECT_DF["right_height"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"].apply(lambda x: x["box_bottom_right"][1] - x["box_top_right"][1])
VIDEO_TO_FRAME_AND_SUBJECT_DF["left_height"] = VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"].apply(lambda x: x["box_bottom_left"][1] - x["box_top_left"][1])


In [None]:
# averaging the width and height by adding both sides and then getting the mean
VIDEO_TO_FRAME_AND_SUBJECT_DF["average_height"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda row: (row["right_height"] + row["left_height"])/2, axis=1)
VIDEO_TO_FRAME_AND_SUBJECT_DF["average_width"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda row: (row["bottom_width"] + row["top_width"])/2, axis=1)

- Getthing the pixel to cm ratio

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["width_ratio"] = MED_PC_WIDTH / VIDEO_TO_FRAME_AND_SUBJECT_DF["average_width"]
VIDEO_TO_FRAME_AND_SUBJECT_DF["height_ratio"] = MED_PC_HEIGHT / VIDEO_TO_FRAME_AND_SUBJECT_DF["average_height"]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["height_ratio"]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["width_ratio"]

## Converting Pixels to cm

- Converting the X-dimension

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_to_tracks"]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["rescaled_locations"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: {key: sleap.process_pose.fill_missing(sleap.process_pose.rescale_dimension_in_array(value, dimension=0, ratio=x["width_ratio"])) for key, value in x["subject_to_tracks"].items()}, axis=1)

- Converting the Y-dimension

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["rescaled_locations"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: {key: sleap.process_pose.rescale_dimension_in_array(value, dimension=1, ratio=x["height_ratio"]) for key, value in x["rescaled_locations"].items()}, axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF

In [None]:
# Normalize dictionary column
normalized = pd.json_normalize(VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_to_coordinate"])

In [None]:
normalized

In [None]:


# Drop the original column and concat the normalized DataFrame
VIDEO_TO_FRAME_AND_SUBJECT_DF = pd.concat([VIDEO_TO_FRAME_AND_SUBJECT_DF.drop(["corner_to_coordinate"], axis=1), normalized], axis=1)


In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.head()

In [None]:
for corner in VIDEO_TO_FRAME_AND_SUBJECT_DF["corner_parts"].iloc[0]:
    VIDEO_TO_FRAME_AND_SUBJECT_DF[corner] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: [x[corner][0]*x["width_ratio"], x[corner][1]*x["height_ratio"]], axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF

## Looking over the tracks

In [None]:
FILE_INDEX = 0

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_path"].iloc[FILE_INDEX]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["rescaled_locations"]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject"]

In [None]:
with h5py.File(VIDEO_TO_FRAME_AND_SUBJECT_DF["sleap_path"].iloc[FILE_INDEX], "r") as f:
    dset_names = list(f.keys())
    current_subject = VIDEO_TO_FRAME_AND_SUBJECT_DF["subject"].iloc[FILE_INDEX]
    locations = VIDEO_TO_FRAME_AND_SUBJECT_DF["rescaled_locations"].iloc[FILE_INDEX][current_subject]
    node_names = [n.decode() for n in f["node_names"][:]]
    
print("===HDF5 datasets===")
print(dset_names)
print()

print("===locations data shape===")
print(locations.shape)
print()

print("===nodes===")
for i, name in enumerate(node_names):
    print(f"{i}: {name}")
print()

In [None]:
thorax_loc = locations[:, THORAX_INDEX, :]

In [None]:
fig, ax = plt.subplots()

plt.plot(thorax_loc[:,0],label='X-coordinates')
# Converting to negative so that we can see both x and y track
plt.plot(-1*thorax_loc[:,1], label='Y-coordinates')

plt.legend(loc="center right")
plt.title('Thorax locations')
plt.xlabel("Time in frames")
plt.ylabel("Coordinate Position")

In [None]:
plt.figure(figsize=(7,7))
plt.plot(thorax_loc[:,0],thorax_loc[:,1])


plt.title('Thorax tracks')
plt.xlabel("X-Coordinates")
plt.ylabel("Y-Coordinates")


## Creating an individual column for each pose tracking

In [None]:
# VIDEO_TO_FRAME_AND_SUBJECT_DF["agent"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: list(set(x["all_subjects"]) - set(x["subject"]))[0], axis=1)

VIDEO_TO_FRAME_AND_SUBJECT_DF["agent"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: list((set(x["all_subjects"]) - set([x["subject"]])))[0], axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_locations"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: x["rescaled_locations"][x["subject"]] , axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["agent_locations"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: x["rescaled_locations"][x["agent"]] , axis=1)

## Removing unnecessary columns

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF.drop(["sleap_glob", "subject_to_index", "subject_to_tracks", "corner_parts", "corner_to_coordinate", "bottom_width", "top_width", "right_height", "left_height", "average_height", "average_width", "width_ratio", "height_ratio", 'locations', 'current_subject', 'track_names', 'sleap_path', 'corner_path', 'all_sleap_data', 'rescaled_locations'], errors="ignore", axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.columns

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.head()

# Adding the start/stop frame information

In [None]:
START_STOP_FRAME_DF.head()

- Getting relevant metadata for each video

In [None]:
# Getting all the rows that have two subjects
START_STOP_FRAME_DF["tracked_subject"] = START_STOP_FRAME_DF["tracked_subject"].apply(lambda x: str(x).split("_"))
START_STOP_FRAME_DF = START_STOP_FRAME_DF[START_STOP_FRAME_DF["tracked_subject"].apply(lambda x: len(x) == 2)]

In [None]:
# Getting the sleap filename from file path
START_STOP_FRAME_DF["sleap_filename"] = START_STOP_FRAME_DF["file_path"].apply(lambda x: os.path.basename(x))

In [None]:
# Getting the sleap fileroot from the sleap filename
START_STOP_FRAME_DF["sleap_fileroot"] = START_STOP_FRAME_DF["sleap_filename"].apply(lambda x: ".".join(x.split(".")[0:2]))

In [None]:
# Combining the start and stop frame columns into a tuple
START_STOP_FRAME_DF["start_stop_frame"] = START_STOP_FRAME_DF.apply(lambda x: (int(x["start_frame"]), int(x["stop_frame"])), axis=1)
START_STOP_FRAME_DF = START_STOP_FRAME_DF.drop(columns=["start_frame", "stop_frame"], errors="ignore")

- Merging the dataframes based on shared SLEAP file basename

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF = pd.merge(left=VIDEO_TO_FRAME_AND_SUBJECT_DF, right=START_STOP_FRAME_DF, left_on="video_name", right_on="sleap_fileroot", how="left")

- Converting the start/stop frames into timestamps

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["start_stop_timestamps"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: (x["video_timestamps"][x["start_stop_frame"][0]], x["video_timestamps"][x["start_stop_frame"][1]]), axis=1)

## Going from frame information to ephys

In [None]:
for trace_col in [col for col in VIDEO_TO_FRAME_AND_SUBJECT_DF.columns if "lfp_trace" in col]:
    print(trace_col)
    brain_region = trace_col.split("_")[0]
    VIDEO_TO_FRAME_AND_SUBJECT_DF["filtered_{}_trace".format(brain_region)] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["lfp_timestamps"], x[trace_col])[1], axis=1)
VIDEO_TO_FRAME_AND_SUBJECT_DF["filtered_lfp_timestamps"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["lfp_timestamps"], x["lfp_timestamps"])[0], axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF.drop(columns=[col for col in VIDEO_TO_FRAME_AND_SUBJECT_DF.columns if "lfp_trace" in col], errors="ignore")
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF.drop(columns=["lfp_timestamps"], errors="ignore")

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.head()

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["video_timestamps"].iloc[0]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["video_timestamps"].apply(lambda x: x.shape)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_locations"].apply(lambda x: x.shape)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["recording"].iloc[5]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["filtered_subject_locations"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["video_timestamps"], x["subject_locations"])[1], axis=1)
VIDEO_TO_FRAME_AND_SUBJECT_DF["filtered_agent_locations"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["video_timestamps"], x["agent_locations"])[1], axis=1)
VIDEO_TO_FRAME_AND_SUBJECT_DF["filtered_video_timestamps"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(x["start_stop_timestamps"][0], x["start_stop_timestamps"][1], x["video_timestamps"], x["video_timestamps"])[0], axis=1)

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF.drop(columns=["video_timestamps", "subject_locations", "agent_locations"], errors="ignore")

In [None]:
# Sorting column names for easier reading
sorted_columns = sorted(VIDEO_TO_FRAME_AND_SUBJECT_DF.columns, key=lambda x: x.split("_")[-1])

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF = VIDEO_TO_FRAME_AND_SUBJECT_DF[sorted_columns].copy()

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.columns

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF

In [None]:
raise ValueError()

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["start_stop_lfp_mask"] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: np.logical_and(x["lfp_timestamps"] > x["start_stop_timestamps"][0], x["lfp_timestamps"] < x["start_stop_timestamps"][1]), axis=1)
                                                                                           


In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["start_stop_lfp_mask"].iloc[0]

In [None]:
for trace_col in [col for col in VIDEO_TO_FRAME_AND_SUBJECT_DF.columns if "lfp_trace" in col]:
    VIDEO_TO_FRAME_AND_SUBJECT_DF["".format(trace_col)] = VIDEO_TO_FRAME_AND_SUBJECT_DF.apply(lambda x: x[trace_col][x["start_stop_lfp_mask"]], axis=1)"]:


In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["start_stop_mpfc_lfp_trace"].iloc[0].shape

In [None]:
1617949/1000

In [None]:
31159889/20000

- Using the start and stop of each subject to remove all other frames where the subject(s) are not there

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["subject_locations"].iloc[0].shape

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["video_timestamps"].iloc[0].shape

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF.columns

In [None]:
def find_nearest_timestamp_indices(timestamps, other_timestamps, start_index=0, stop_index=1):
    """
    Converts the start and stop indices of one data stream to timestamps, and then finds the nearest start and stop 
    timestamps in another data stream.

    Parameters:
    - timestamps (list[int or float]): The list of timestamps in the first data stream.
    - other_timestamps (list[int or float]): The list of timestamps in the other data stream.
    - start_index (int, optional): The start index in the first data stream. Defaults to 0.
    - stop_index (int, optional): The stop index in the first data stream. Defaults to 1.

    Returns:
    - tuple: The indices of the nearest start and stop timestamps in the other data stream.
    """
    # Convert start and stop indices to timestamps
    start_timestamp = timestamps[start_index]
    stop_timestamp = timestamps[stop_index]

    # Find nearest start and stop timestamps in other data stream
    nearest_start_index = utilities.helper.find_nearest_index(other_timestamps, start_timestamp)
    nearest_stop_index = utilities.helper.find_nearest_index(other_timestamps, stop_timestamp) 

    return nearest_start_index, nearest_stop_index

In [None]:
convert_index_to_timestamp(0, 100, VIDEO_TO_FRAME_AND_SUBJECT_DF["video_timestamps"].iloc[0], VIDEO_TO_FRAME_AND_SUBJECT_DF["lfp_timestamps"].iloc[0])

In [None]:
start_frame = VIDEO_TO_FRAME_AND_SUBJECT_DF['port_entry_frames'].iloc[0][0][-1]

In [None]:
start_frame

In [None]:
start_timestamp = VIDEO_TO_FRAME_AND_SUBJECT_DF['video_timestamps'].iloc[0][0]

In [None]:
start_timestamp

In [None]:
start_lfp = utilities.helper.find_nearest_index(VIDEO_TO_FRAME_AND_SUBJECT_DF["lfp_timestamps"].iloc[0], start_timestamp)

In [None]:
start_lfp

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["lfp_timestamps"].iloc[0][start_lfp]

In [None]:
VIDEO_TO_FRAME_AND_SUBJECT_DF["bla_lfp_trace"].iloc[0].shape