# SLEAP Distance Calculation

Brief 1-2 sentence description of notebook.

In [1]:
import os
import glob
import git
import sys
from collections import defaultdict

In [2]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
# import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import h5py


In [3]:
git_repo = git.Repo(".", search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")

In [4]:
git_root

'/nancy/user/riwata/projects/reward_comp_ext'

In [5]:
sys.path.insert(0, os.path.join(git_root, 'src'))

In [6]:
import sleap.process_pose

In [7]:
# sns.set('notebook', 'ticks', font_scale=1.2)
mpl.rcParams['figure.figsize'] = [15,6]

# Functions

In [8]:
def combine_dicts(dicts):
    """
    Combine lists from multiple dictionaries that share the same key.

    This function takes a list of dictionaries where each dictionary's values are lists of numbers.
    It merges these lists for each corresponding key across all dictionaries, producing a single
    dictionary where each key has a combined list of all numbers from the input dictionaries.

    Parameters:
        dicts (list of dict): A list of dictionaries with values as lists of numbers.

    Returns:
        dict: A dictionary with keys from the input dictionaries and values as merged lists 
        of numbers from all corresponding input dictionary values.
    """
    combined = defaultdict(list)
    for dictionary in dicts:
        for key, value in dictionary.items():
            combined[key].extend(value)
    
    return dict(combined)


# Example usage
list_of_dicts = [
    {'a': [1, 2], 'b': [3, 4]},
    {'a': [5], 'b': [6, 7]},
    {'a': [8, 9], 'c': [10]}
]

combined_dict = combine_dicts(list_of_dicts)
print(combined_dict)

{'a': [1, 2, 5, 8, 9], 'b': [3, 4, 6, 7], 'c': [10]}


In [9]:
def find_consecutive_ranges(numbers, min_length=1):
    """
    Finds the start and end indices for consecutive ranges of each number in a list where the range meets a minimum length.

    This function iterates through a list of integers and identifies ranges where the same integer appears consecutively
    and the length of this sequence meets or exceeds the specified minimum length.

    Parameters:
        numbers (list): A list of integers to analyze for consecutive ranges.
        min_length (int): The minimum length of a range for it to be included in the results.

    Returns:
        dict: A dictionary with integers as keys and a list of tuples (start, end) as values,
              where each tuple represents the start and end indices (inclusive) of consecutive ranges
              for that integer. Only ranges that meet or exceed the minimum length are included.
    """
    ranges = {}
    n = len(numbers)
    if n == 0:
        return ranges
    
    start = 0
    current = numbers[0]

    for i in range(1, n):
        if numbers[i] != current:
            if (i - start) >= min_length:
                if current not in ranges:
                    ranges[current] = []
                ranges[current].append((start, i - 1))
            current = numbers[i]
            start = i

    # Handle the last range
    if (n - start) >= min_length:
        if current not in ranges:
            ranges[current] = []
        ranges[current].append((start, n - 1))

    return ranges

# Example usage:
numbers = [1, 1, 2, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1]
print(find_consecutive_ranges(numbers, min_length=3))

{2: [(2, 4)], 3: [(5, 8)]}


In [10]:
def update_tuples_in_dict(original_dict, reference_list):
    """
    Updates the values in the tuples within a dictionary by replacing indices with corresponding values from a reference list.
    
    This function iterates through each key-value pair in the original dictionary. Each value is expected to be a list of tuples,
    where each tuple contains indices. These indices are used to fetch corresponding values from the reference list, creating new tuples.
    
    Parameters:
        original_dict (dict): Dictionary whose values are lists of tuples. Each tuple consists of indices into the reference_list.
        reference_list (list): List of elements that are referenced by the indices in the tuples of the original_dict.
        
    Returns:
        dict: A dictionary with the same keys as original_dict but with tuples transformed to contain elements from reference_list
              based on the indices in the original tuples.
    """
    # Create a new dictionary to store the updated key-value pairs
    new_dict = {}
    for key, list_of_tuples in original_dict.items():
        # Process each tuple in the list associated with the current key
        updated_tuples = [
            tuple(reference_list[idx] for idx in tup) for tup in list_of_tuples
        ]
        new_dict[key] = updated_tuples
    
    return new_dict

# Example usage:
original_dict = {
    'a': [(0, 1), (2, 3)],
    'b': [(1, 3), (0, 2)]
}
reference_list = ['alpha', 'beta', 'gamma', 'delta']

updated_dict = update_tuples_in_dict(original_dict, reference_list)
print(updated_dict)

{'a': [('alpha', 'beta'), ('gamma', 'delta')], 'b': [('beta', 'delta'), ('alpha', 'gamma')]}


## Inputs & Data

Explanation of each input and where it comes from.

In [11]:
OUTPUT_PREFIX = "rce_pilot_3_long_comp"

In [12]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case

LFP_SPECTRAL_DF = pd.read_pickle("./proc/{}_03_spectral_bands.pkl".format(OUTPUT_PREFIX))
# LFP_SPECTRAL_DF = pd.read_pickle("./proc/{}_01_lfp_traces_and_frames.pkl".format(OUTPUT_PREFIX))



In [13]:
# SLEAP_DF = pd.read_pickle("/blue/npadillacoreano/ryoi360/projects/reward_comp/repos/reward_comp_ext/results/2024_06_26_sleap_clustering/proc/grouped_exploded_columns.pkl")
SLEAP_DF = pd.read_pickle("../2024_06_26_sleap_clustering/proc/grouped_exploded_columns.pkl")

In [14]:
SLEAP_DF.head()

Unnamed: 0,video_name,current_subject,frame_index,tone_start_frame,tone_stop_frame,condition,competition_closeness,notes,10s_before_tone_frame,10s_after_tone_frame,...,subject_nose,subject_tail_base,agent_thorax,agent_nose,agent_tail_base,session_dir,experiment,sleap_name,video_id,agent
0,20240317_151922_long_comp_subj_3-1_and_3-3.1,3.1,"[1086, 1087, 1088, 1089, 1090, 1091, 1092, 109...","[1286, 1286, 1286, 1286, 1286, 1286, 1286, 128...","[1486, 1486, 1486, 1486, 1486, 1486, 1486, 148...","[3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, ...","[Subj 2 then Subj 1, Subj 2 then Subj 1, Subj ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1086, 1086, 1086, 1086, 1086, 1086, 1086, 108...","[1686, 1686, 1686, 1686, 1686, 1686, 1686, 168...",...,"[[60.14673983110819, 38.0809500426292], [60.14...","[[65.86894804065032, 31.8436244490543], [65.86...","[[51.04336785027878, 11.609328010205985], [51....","[[43.139588993359475, 13.385259646809214], [43...","[[54.49241922411595, 12.722769593270066], [54....",20240317_151922_long_comp_subj_3-1_and_3-3,long_comp,20240317_151922_long_comp_subj_3-1_and_3-3.1.f...,5,3.3
1,20240317_151922_long_comp_subj_3-1_and_3-3.1,3.3,"[1086, 1087, 1088, 1089, 1090, 1091, 1092, 109...","[1286, 1286, 1286, 1286, 1286, 1286, 1286, 128...","[1486, 1486, 1486, 1486, 1486, 1486, 1486, 148...","[3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, ...","[Subj 2 then Subj 1, Subj 2 then Subj 1, Subj ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1086, 1086, 1086, 1086, 1086, 1086, 1086, 108...","[1686, 1686, 1686, 1686, 1686, 1686, 1686, 168...",...,"[[43.139588993359475, 13.385259646809214], [43...","[[54.49241922411595, 12.722769593270066], [54....","[[64.11036004541992, 34.137273635147906], [64....","[[60.14673983110819, 38.0809500426292], [60.14...","[[65.86894804065032, 31.8436244490543], [65.86...",20240317_151922_long_comp_subj_3-1_and_3-3,long_comp,20240317_151922_long_comp_subj_3-1_and_3-3.1.f...,5,3.1
2,20240317_172017_long_comp_subj_4-2_and_4-3.1,4.2,"[1100, 1101, 1102, 1103, 1104, 1105, 1106, 110...","[1300, 1300, 1300, 1300, 1300, 1300, 1300, 130...","[1501, 1501, 1501, 1501, 1501, 1501, 1501, 150...","[4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, ...","[Subj 1 then Subj 2, Subj 1 then Subj 2, Subj ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1100, 1100, 1100, 1100, 1100, 1100, 1100, 110...","[1701, 1701, 1701, 1701, 1701, 1701, 1701, 170...",...,"[[65.23110812441777, 8.220414676017084], [62.5...","[[72.8621368084351, 13.53665765315761], [72.41...","[[57.3449526902064, 32.078917344899914], [57.5...","[[60.79884848875681, 35.963595797483414], [60....","[[56.32709801682682, 29.966639024342353], [56....",20240317_172017_long_comp_subj_4-2_and_4-3,long_comp,20240317_172017_long_comp_subj_4-2_and_4-3.1.f...,6,4.3
3,20240317_172017_long_comp_subj_4-2_and_4-3.1,4.3,"[1100, 1101, 1102, 1103, 1104, 1105, 1106, 110...","[1300, 1300, 1300, 1300, 1300, 1300, 1300, 130...","[1501, 1501, 1501, 1501, 1501, 1501, 1501, 150...","[4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, ...","[Subj 1 then Subj 2, Subj 1 then Subj 2, Subj ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1100, 1100, 1100, 1100, 1100, 1100, 1100, 110...","[1701, 1701, 1701, 1701, 1701, 1701, 1701, 170...",...,"[[60.79884848875681, 35.963595797483414], [60....","[[56.32709801682682, 29.966639024342353], [56....","[[72.18478668206376, 10.328978616940205], [71....","[[65.23110812441777, 8.220414676017084], [62.5...","[[72.8621368084351, 13.53665765315761], [72.41...",20240317_172017_long_comp_subj_4-2_and_4-3,long_comp,20240317_172017_long_comp_subj_4-2_and_4-3.1.f...,6,4.2
4,20240318_143819_long_comp_subj_3-3_and_3-4.1,3.3,"[1134, 1135, 1136, 1137, 1138, 1139, 1140, 114...","[1334, 1334, 1334, 1334, 1334, 1334, 1334, 133...","[1533, 1533, 1533, 1533, 1533, 1533, 1533, 153...","[temp, temp, temp, temp, temp, temp, temp, tem...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1134, 1134, 1134, 1134, 1134, 1134, 1134, 113...","[1733, 1733, 1733, 1733, 1733, 1733, 1733, 173...",...,"[[74.05712670191413, 17.15715452958501], [74.0...","[[69.55312088481946, 25.429423623129917], [71....","[[65.57930503887175, 33.84569047638807], [65.3...","[[58.01061366578415, 33.98924535992776], [58.0...","[[68.06216599593583, 32.37245454635553], [67.8...",20240318_143819_long_comp_subj_3-3_and_3-4,long_comp,20240318_143819_long_comp_subj_3-3_and_3-4.1.f...,7,3.4


## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

In [15]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc/" # where data is saved should always be shown in the inputs
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [16]:
FULL_LFP_TRACES_PKL = "{}_04_spectral_and_sleap.pkl".format(OUTPUT_PREFIX)

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

## Merging the dataframes based on shared video name

In [17]:
LFP_SPECTRAL_DF["video_name"] = LFP_SPECTRAL_DF["video_name"].apply(lambda x: x.strip(".videoTimeStamps.cameraHWSync"))


In [18]:
LFP_SPECTRAL_DF.head()

Unnamed: 0,cohort,session_dir,tone_frames,box_1_port_entry_frames,box_2_port_entry_frames,video_name,notes,session_path,recording,current_subject,...,vHPC_power_theta,vHPC_power_gamma,BLA_power_theta,BLA_power_gamma,LH_power_theta,LH_power_gamma,MD_power_theta,MD_power_gamma,mPFC_power_theta,mPFC_power_gamma
0,rce_pilot_3,20240317_151922_long_comp_subj_3-1_and_3-3,"[[1, 88], [1286, 1486], [3282, 3482], [4480, 4...","[[1, 88], [259, 278], [310, 365], [366, 382], ...",[],20240317_151922_long_comp_subj_3-1_and_3-3.1,,/scratch/back_up/reward_competition_extention/...,20240317_151922_long_comp_subj_3-1_t6b6_merged,3.1,...,"[0.029989537, 0.05074093, 0.045202438, 0.03654...","[0.0021393485, 0.001990721, 0.0016591972, 0.00...","[0.01576374, 0.017160619, nan, nan, 0.01513487...","[0.0029858148, 0.0055226046, nan, nan, 0.00234...","[0.014374395, nan, nan, nan, nan, nan, 0.02792...","[0.0015033478, nan, nan, nan, nan, nan, 0.0011...","[0.015749235, nan, nan, 0.013588659, 0.0394984...","[0.0012602397, nan, nan, 0.0016373609, 0.00110...","[0.02549853, 0.01599767, 0.019950056, 0.041470...","[0.0018852353, 0.0019809369, 0.0013036462, 0.0..."
1,rce_pilot_3,20240317_151922_long_comp_subj_3-1_and_3-3,"[[2, 132], [1922, 2220], [4906, 5203], [6696, ...","[[2, 132], [387, 415], [463, 546], [548, 571],...",[],20240317_151922_long_comp_subj_3-1_and_3-3.2,,/scratch/back_up/reward_competition_extention/...,20240317_151922_long_comp_subj_3-1_t6b6_merged,3.1,...,"[0.029989537, 0.05074093, 0.045202438, 0.03654...","[0.0021393485, 0.001990721, 0.0016591972, 0.00...","[0.01576374, 0.017160619, nan, nan, 0.01513487...","[0.0029858148, 0.0055226046, nan, nan, 0.00234...","[0.014374395, nan, nan, nan, nan, nan, 0.02792...","[0.0015033478, nan, nan, nan, nan, nan, 0.0011...","[0.015749235, nan, nan, 0.013588659, 0.0394984...","[0.0012602397, nan, nan, 0.0016373609, 0.00110...","[0.02549853, 0.01599767, 0.019950056, 0.041470...","[0.0018852353, 0.0019809369, 0.0013036462, 0.0..."
2,rce_pilot_3,20240317_151922_long_comp_subj_3-1_and_3-3,"[[1, 88], [1286, 1486], [3282, 3482], [4480, 4...","[[1, 88], [259, 278], [310, 365], [366, 382], ...",[],20240317_151922_long_comp_subj_3-1_and_3-3.1,,/scratch/back_up/reward_competition_extention/...,20240317_151922_long_comp_subj_3-3_t5b5_merged,3.3,...,"[0.030139383, 0.038337562, 0.035997633, 0.0163...","[0.0013218134, 0.001314961, 0.0014802001, 0.00...","[0.035382226, 0.031998232, 0.029764285, 0.0189...","[0.0013072899, 0.0012992765, 0.0010765253, 0.0...","[0.03857511, 0.028973801, 0.029779362, 0.02279...","[0.0009867165, 0.0008933175, 0.00090869045, 0....","[0.01698081, 0.014525875, 0.015493691, 0.01093...","[0.0012375844, 0.0010923791, 0.0008535082, 0.0...","[0.020940535, 0.027591836, 0.029557036, 0.0211...","[0.0014317904, 0.0010592391, 0.0012028972, 0.0..."
3,rce_pilot_3,20240317_151922_long_comp_subj_3-1_and_3-3,"[[2, 132], [1922, 2220], [4906, 5203], [6696, ...","[[2, 132], [387, 415], [463, 546], [548, 571],...",[],20240317_151922_long_comp_subj_3-1_and_3-3.2,,/scratch/back_up/reward_competition_extention/...,20240317_151922_long_comp_subj_3-3_t5b5_merged,3.3,...,"[0.030139383, 0.038337562, 0.035997633, 0.0163...","[0.0013218134, 0.001314961, 0.0014802001, 0.00...","[0.035382226, 0.031998232, 0.029764285, 0.0189...","[0.0013072899, 0.0012992765, 0.0010765253, 0.0...","[0.03857511, 0.028973801, 0.029779362, 0.02279...","[0.0009867165, 0.0008933175, 0.00090869045, 0....","[0.01698081, 0.014525875, 0.015493691, 0.01093...","[0.0012375844, 0.0010923791, 0.0008535082, 0.0...","[0.020940535, 0.027591836, 0.029557036, 0.0211...","[0.0014317904, 0.0010592391, 0.0012028972, 0.0..."
4,rce_pilot_3,20240317_172017_long_comp_subj_4-2_and_4-3,"[[1, 102], [1300, 1501], [3297, 3497], [4494, ...","[[1, 102], [115, 153], [191, 245], [261, 263],...",[],20240317_172017_long_comp_subj_4-2_and_4-3.1,,/scratch/back_up/reward_competition_extention/...,20240317_172017_long_comp_subj_4-2_t6b6_merged,4.2,...,"[0.028422035, 0.01407221, 0.023395533, 0.02382...","[0.0036248525, 0.004009613, 0.0021644833, 0.00...","[0.045641977, 0.046155874, 0.029715423, 0.0361...","[0.0026258836, 0.0029411335, 0.004551022, 0.00...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[0.03796135, 0.033379465, 0.020905837, 0.03186...","[0.001795531, 0.0019260239, 0.0023288091, 0.00..."


In [19]:
SLEAP_DF["video_name"] = SLEAP_DF["video_name"].apply(lambda x: x.strip(".videoTimeStamps.cameraHWSync"))


In [20]:
cols_to_use = LFP_SPECTRAL_DF.columns.difference(SLEAP_DF.columns)
merged_LFP_SPECTRAL_DF = pd.merge(SLEAP_DF, LFP_SPECTRAL_DF[list(cols_to_use) + ["video_name", "current_subject"]], on=["video_name", "current_subject"], how="inner")

In [21]:
merged_LFP_SPECTRAL_DF.head()

Unnamed: 0,video_name,current_subject,frame_index,tone_start_frame,tone_stop_frame,condition,competition_closeness,notes,10s_before_tone_frame,10s_after_tone_frame,...,vHPC_gamma_band,vHPC_gamma_phase,vHPC_lfp_trace,vHPC_mPFC_granger_all_frequencies_all_windows,vHPC_power_all_frequencies_all_windows,vHPC_power_gamma,vHPC_power_theta,vHPC_theta_band,vHPC_theta_phase,video_timestamps
0,20240317_151922_long_comp_subj_3-1_and_3-3.1,3.1,"[1086, 1087, 1088, 1089, 1090, 1091, 1092, 109...","[1286, 1286, 1286, 1286, 1286, 1286, 1286, 128...","[1486, 1486, 1486, 1486, 1486, 1486, 1486, 148...","[3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, ...","[Subj 2 then Subj 1, Subj 2 then Subj 1, Subj ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1086, 1086, 1086, 1086, 1086, 1086, 1086, 108...","[1686, 1686, 1686, 1686, 1686, 1686, 1686, 168...",...,"[0.2201744, 0.29787034, 0.35982215, 0.40078652...","[-0.954227, -0.5681917, -0.37009934, -0.076165...","[-93.015, -87.75, -61.815, -29.445, -25.935, -...","[[0.45251086, 0.062478807, 0.47624385, 0.30874...","[[0.00589379, 0.0066235354, 0.008387087, 0.020...","[0.0021393485, 0.001990721, 0.0016591972, 0.00...","[0.029989537, 0.05074093, 0.045202438, 0.03654...","[-0.36557105, -0.35522795, -0.34353578, -0.330...","[2.4317799, 2.9768577, 3.0195692, -2.9837036, ...","[-2, 1384, 1384, 2770, 4156, 5542, 5542, 6928,..."
1,20240317_151922_long_comp_subj_3-1_and_3-3.1,3.3,"[1086, 1087, 1088, 1089, 1090, 1091, 1092, 109...","[1286, 1286, 1286, 1286, 1286, 1286, 1286, 128...","[1486, 1486, 1486, 1486, 1486, 1486, 1486, 148...","[3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, ...","[Subj 2 then Subj 1, Subj 2 then Subj 1, Subj ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1086, 1086, 1086, 1086, 1086, 1086, 1086, 108...","[1686, 1686, 1686, 1686, 1686, 1686, 1686, 168...",...,"[0.22105147, 0.22064163, 0.20793986, 0.1835019...","[-0.62403333, 0.03802512, 0.2023374, 0.5861103...","[63.569996, 69.615, 73.32, 57.914997, 37.05, 1...","[[0.001371691, 0.20976743, 0.17549993, 0.50632...","[[0.011929583, 0.01116798, 0.030497659, 0.0248...","[0.0013218134, 0.001314961, 0.0014802001, 0.00...","[0.030139383, 0.038337562, 0.035997633, 0.0163...","[0.092747614, 0.061942074, 0.030017471, -0.002...","[1.2483366, 1.4093654, 1.4967988, 1.5773847, 1...","[-2, 1384, 1384, 2770, 4156, 5542, 5542, 6928,..."
2,20240317_172017_long_comp_subj_4-2_and_4-3.1,4.2,"[1100, 1101, 1102, 1103, 1104, 1105, 1106, 110...","[1300, 1300, 1300, 1300, 1300, 1300, 1300, 130...","[1501, 1501, 1501, 1501, 1501, 1501, 1501, 150...","[4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, ...","[Subj 1 then Subj 2, Subj 1 then Subj 2, Subj ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1100, 1100, 1100, 1100, 1100, 1100, 1100, 110...","[1701, 1701, 1701, 1701, 1701, 1701, 1701, 170...",...,"[0.0987376, -0.040491626, -0.18328854, -0.3207...","[1.1787544, 1.6703464, 1.9961439, 2.2459297, 2...","[20.67, -22.035, -49.53, -35.879997, -41.34, -...","[[0.12891674, 1.1085438, 1.110659, 0.18731599,...","[[0.02371147, 0.0119896205, 0.04165479, 0.0294...","[0.0036248525, 0.004009613, 0.0021644833, 0.00...","[0.028422035, 0.01407221, 0.023395533, 0.02382...","[-0.3841577, -0.3746623, -0.36333248, -0.35014...","[2.6506236, -2.9641438, -2.8957667, -2.6281376...","[-2, 1384, 2770, 2770, 4156, 5542, 5542, 6928,..."
3,20240317_172017_long_comp_subj_4-2_and_4-3.1,4.3,"[1100, 1101, 1102, 1103, 1104, 1105, 1106, 110...","[1300, 1300, 1300, 1300, 1300, 1300, 1300, 130...","[1501, 1501, 1501, 1501, 1501, 1501, 1501, 150...","[4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, ...","[Subj 1 then Subj 2, Subj 1 then Subj 2, Subj ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1100, 1100, 1100, 1100, 1100, 1100, 1100, 110...","[1701, 1701, 1701, 1701, 1701, 1701, 1701, 170...",...,"[0.20125747, 0.25603184, 0.297664, 0.3218874, ...","[-0.8791572, -0.44574457, -0.25671825, 0.05544...","[103.155, 145.275, 186.42, 190.70999, 185.4449...","[[2.2130556, 0.80043894, 0.22213456, 0.2710376...","[[0.008084274, 0.006739571, 0.008228533, 0.025...","[0.0013482062, 0.0012150956, 0.0016119903, 0.0...","[0.050342575, 0.03379877, 0.029949626, 0.03911...","[0.422362, 0.42610937, 0.4286247, 0.42985302, ...","[-0.84279317, -0.4306993, -0.40252516, -0.1809...","[-2, 1384, 2770, 2770, 4156, 5542, 5542, 6928,..."
4,20240318_143819_long_comp_subj_3-3_and_3-4.1,3.3,"[1134, 1135, 1136, 1137, 1138, 1139, 1140, 114...","[1334, 1334, 1334, 1334, 1334, 1334, 1334, 133...","[1533, 1533, 1533, 1533, 1533, 1533, 1533, 153...","[temp, temp, temp, temp, temp, temp, temp, tem...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[1134, 1134, 1134, 1134, 1134, 1134, 1134, 113...","[1733, 1733, 1733, 1733, 1733, 1733, 1733, 173...",...,"[0.049426246, 0.083508745, 0.11384763, 0.13834...","[-1.1458093, -0.84176433, -0.61683136, -0.3612...","[-54.21, -60.254997, -66.494995, -64.155, -57....","[[0.678713, 0.37892807, 0.1396561, 0.028809266...","[[0.0035772505, 0.007254109, 0.008794605, 0.02...","[0.0016460003, 0.0016080565, 0.0019354661, 0.0...","[0.05727462, 0.03308555, 0.034518804, 0.043184...","[-0.5945928, -0.5781312, -0.5601761, -0.540729...","[2.5259705, 3.1337843, -3.0986528, -2.821297, ...","[-2, 1384, 1384, 2770, 2770, 4156, 5542, 5542,..."


## Getting the timestamps of each cluster frame

In [22]:
merged_LFP_SPECTRAL_DF["frame_index"].iloc[0]

array([ 1086,  1087,  1088, ..., 40511, 40512, 40513])

In [23]:
merged_LFP_SPECTRAL_DF["video_timestamps"].iloc[0]

array([      -2,     1384,     1384, ..., 45819618, 45819618, 45821004],
      dtype=int32)

In [24]:
merged_LFP_SPECTRAL_DF["cluster_timestamp"] = merged_LFP_SPECTRAL_DF.apply(lambda x: x["video_timestamps"][x["frame_index"]], axis=1)


In [25]:
merged_LFP_SPECTRAL_DF["cluster_timestamp"].head()

0    [1087906, 1089292, 1089292, 1090678, 1092063, ...
1    [1087906, 1089292, 1089292, 1090678, 1092063, ...
2    [1101765, 1103151, 1104536, 1104536, 1105922, ...
3    [1101765, 1103151, 1104536, 1104536, 1105922, ...
4    [1100378, 1101764, 1103150, 1104536, 1104536, ...
Name: cluster_timestamp, dtype: object

- Checking if the cluster timestamp shape matches the actual number of clusters

In [26]:
merged_LFP_SPECTRAL_DF["cluster_timestamp"].apply(lambda x: x.shape).head()

0    (17987,)
1    (17987,)
2    (24593,)
3    (24593,)
4    (24578,)
Name: cluster_timestamp, dtype: object

In [27]:
merged_LFP_SPECTRAL_DF["kmeans_cluster"].head()

0    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2    [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, ...
3    [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, ...
4    [6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
Name: kmeans_cluster, dtype: object

In [28]:
merged_LFP_SPECTRAL_DF["kmeans_cluster"].apply(lambda x: x.shape).head()

0    (17987,)
1    (17987,)
2    (24593,)
3    (24593,)
4    (24578,)
Name: kmeans_cluster, dtype: object

In [29]:
FULL_LFP_TRACES_PKL

'rce_pilot_3_long_comp_04_spectral_and_sleap.pkl'

In [30]:
merged_LFP_SPECTRAL_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))


In [31]:
merged_LFP_SPECTRAL_DF.shape

(16, 196)

In [32]:
list(SLEAP_DF.columns)

['video_name',
 'current_subject',
 'frame_index',
 'tone_start_frame',
 'tone_stop_frame',
 'condition ',
 'competition_closeness',
 'notes',
 '10s_before_tone_frame',
 '10s_after_tone_frame',
 'subject_thorax_to_agent_thorax',
 'nose_to_reward_port_sum',
 'nose_to_reward_port_diff',
 'thorax_velocity_sum',
 'thorax_velocity_diff',
 'to_reward_port_angle_sum',
 'to_reward_port_angle_diff',
 'subject_nose_to_reward_port',
 'subject_thorax_velocity',
 'subject_to_reward_port_angle',
 'agent_nose_to_reward_port',
 'agent_thorax_velocity',
 'agent_to_reward_port_angle',
 'closebool_subject_nose_to_reward_port',
 'closebool_agent_nose_to_reward_port',
 'movingbool_subject_thorax_velocity',
 'movingbool_agent_thorax_velocity',
 'manual_cluster_id',
 'standard_embedding_x',
 'standard_embedding_y',
 'kmeans_cluster',
 'subject_locations',
 'agent_locations',
 'subject_thorax',
 'subject_nose',
 'subject_tail_base',
 'agent_thorax',
 'agent_nose',
 'agent_tail_base',
 'session_dir',
 'experim

In [33]:
merged_LFP_SPECTRAL_DF["recording"].unique()

array(['20240317_151922_long_comp_subj_3-1_t6b6_merged',
       '20240317_151922_long_comp_subj_3-3_t5b5_merged',
       '20240317_172017_long_comp_subj_4-2_t6b6_merged',
       '20240317_172017_long_comp_subj_4-3_t5b5_merged',
       '20240318_143819_long_comp_subj_3-3_t6b6_merged',
       '20240318_143819_long_comp_subj_3-4_t5b5_merged',
       '20240318_170933_long_comp_subj_4-3_t6b6_merged',
       '20240318_170933_long_comp_subj_4-4_t5b5_merged',
       '20240319_134914_long_comp_subj_3-1_t5b5_merged',
       '20240319_134914_long_comp_subj_3-4_t6b6_merged',
       '20240319_160457_long_comp_subj_4-2_t5b5_merged',
       '20240319_160457_long_comp_subj_4-4_t6b6_merged',
       '20240320_114629_long_comp_subj_5-3_t6b6_merged',
       '20240320_114629_long_comp_subj_5-4_t5b5_merged',
       '20240321_114851_long_comp_subj_5-2_t6b6_merged',
       '20240321_114851_long_comp_subj_5-3_t5b5_merged'], dtype=object)

In [34]:
merged_LFP_SPECTRAL_DF.columns

Index(['video_name', 'current_subject', 'frame_index', 'tone_start_frame',
       'tone_stop_frame', 'condition ', 'competition_closeness', 'notes',
       '10s_before_tone_frame', '10s_after_tone_frame',
       ...
       'vHPC_gamma_phase', 'vHPC_lfp_trace',
       'vHPC_mPFC_granger_all_frequencies_all_windows',
       'vHPC_power_all_frequencies_all_windows', 'vHPC_power_gamma',
       'vHPC_power_theta', 'vHPC_theta_band', 'vHPC_theta_phase',
       'video_timestamps', 'cluster_timestamp'],
      dtype='object', length=196)

# Finding the ranges of each cluster

In [35]:
for col in merged_LFP_SPECTRAL_DF.columns:
    print(col)

video_name
current_subject
frame_index
tone_start_frame
tone_stop_frame
condition 
competition_closeness
notes
10s_before_tone_frame
10s_after_tone_frame
subject_thorax_to_agent_thorax
nose_to_reward_port_sum
nose_to_reward_port_diff
thorax_velocity_sum
thorax_velocity_diff
to_reward_port_angle_sum
to_reward_port_angle_diff
subject_nose_to_reward_port
subject_thorax_velocity
subject_to_reward_port_angle
agent_nose_to_reward_port
agent_thorax_velocity
agent_to_reward_port_angle
closebool_subject_nose_to_reward_port
closebool_agent_nose_to_reward_port
movingbool_subject_thorax_velocity
movingbool_agent_thorax_velocity
manual_cluster_id
standard_embedding_x
standard_embedding_y
kmeans_cluster
subject_locations
agent_locations
subject_thorax
subject_nose
subject_tail_base
agent_thorax
agent_nose
agent_tail_base
session_dir
experiment
sleap_name
video_id
agent
BLA-to-LH_granger_gamma
BLA-to-LH_granger_theta
BLA-to-MD_granger_gamma
BLA-to-MD_granger_theta
BLA-to-mPFC_granger_gamma
BLA-to-mPF

In [37]:
LFP_SPECTRAL_DF["video_name"].unique()

array(['20240317_151922_long_comp_subj_3-1_and_3-3.1',
       '20240317_151922_long_comp_subj_3-1_and_3-3.2',
       '20240317_172017_long_comp_subj_4-2_and_4-3.1',
       '20240317_172017_long_comp_subj_4-2_and_4-3.2',
       '20240318_143819_long_comp_subj_3-3_and_3-4.1',
       '20240318_170933_long_comp_subj_4-3_and_4-4.1',
       '20240319_134914_long_comp_subj_3-1_and_3-4.1',
       '20240319_160457_long_comp_subj_4-2_and_4-4.1',
       '20240320_114629_long_comp_subj_5-3_and_5-4.1',
       '20240321_114851_long_comp_subj_5-2_and_5-3.1'], dtype=object)

In [38]:
merged_LFP_SPECTRAL_DF["video_name"].unique()

array(['20240317_151922_long_comp_subj_3-1_and_3-3.1',
       '20240317_172017_long_comp_subj_4-2_and_4-3.1',
       '20240318_143819_long_comp_subj_3-3_and_3-4.1',
       '20240318_170933_long_comp_subj_4-3_and_4-4.1',
       '20240319_134914_long_comp_subj_3-1_and_3-4.1',
       '20240319_160457_long_comp_subj_4-2_and_4-4.1',
       '20240320_114629_long_comp_subj_5-3_and_5-4.1',
       '20240321_114851_long_comp_subj_5-2_and_5-3.1'], dtype=object)

In [39]:
SLEAP_DF["video_name"].unique()

array(['20240317_151922_long_comp_subj_3-1_and_3-3.1',
       '20240317_172017_long_comp_subj_4-2_and_4-3.1',
       '20240318_143819_long_comp_subj_3-3_and_3-4.1',
       '20240318_170933_long_comp_subj_4-3_and_4-4.1',
       '20240319_134914_long_comp_subj_3-1_and_3-4.1',
       '20240319_160457_long_comp_subj_4-2_and_4-4.1',
       '20240320_114629_long_comp_subj_5-3_and_5-4.1',
       '20240320_171038_alone_comp_subj_4-2_and_4-3.1',
       '20240321_114851_long_comp_subj_5-2_and_5-3.1',
       '20240322_120625_alone_comp_subj_3-3_and_3-4.1',
       '20240322_160946_alone_comp_subj_4-3_and_4-4.1',
       '20240323_144517_alone_comp_subj_3-1_and_3-4.1',
       '20240323_165815_alone_comp_subj_4-2_and_4-4.1'], dtype=object)

In [None]:
raise ValueError()

## Separating each frame into its own row

In [None]:
filter_columns = ['video_name',
 'current_subject',
 'session_dir',
 'sleap_name',
 'video_id',
 'agent',
 'cluster_timestamp',
 'first_timestamp',
 'recording']

In [None]:
explode_columns = ['frame_index',
 'tone_start_frame',
 'tone_stop_frame',
 'condition ',
 'competition_closeness',
 'notes',
 '10s_before_tone_frame',
 '10s_after_tone_frame',
 'subject_thorax_to_agent_thorax',
 'nose_to_reward_port_sum',
 'nose_to_reward_port_diff',
 'thorax_velocity_sum',
 'thorax_velocity_diff',
 'to_reward_port_angle_sum',
 'to_reward_port_angle_diff',
 'subject_nose_to_reward_port',
 'subject_thorax_velocity',
 'subject_to_reward_port_angle',
 'agent_nose_to_reward_port',
 'agent_thorax_velocity',
 'agent_to_reward_port_angle',
 'closebool_subject_nose_to_reward_port',
 'closebool_agent_nose_to_reward_port',
 'movingbool_subject_thorax_velocity',
 'movingbool_agent_thorax_velocity',
 'manual_cluster_id',
 'standard_embedding_x',
 'standard_embedding_y',
 'kmeans_cluster',
 'subject_locations',
 'agent_locations',
 'subject_thorax',
 'subject_nose',
 'subject_tail_base',
 'agent_thorax',
 'agent_nose',
 'agent_tail_base']

In [None]:
# selecting columns to simplify
filtered_LFP_SPECTRAL_DF = merged_LFP_SPECTRAL_DF[filter_columns + explode_columns].copy()

In [None]:
filtered_LFP_SPECTRAL_DF.head()

In [None]:
# explode_columns = [
#     "cluster_timestamp",
#     "kmeans_cluster",
#     "condition ",
#     "competition_closeness",
#     "tone_start_frame"]

In [None]:
exploded_LFP_SPECTRAL_DF = filtered_LFP_SPECTRAL_DF.explode(explode_columns).reset_index(drop=True)


In [None]:
exploded_LFP_SPECTRAL_DF.head()

# Grouping columns by shared video, subject, and tone start(aka grouping by trial)

In [None]:
explode_columns

In [None]:
groupby_columns = ["video_name", "current_subject", "tone_start_frame"]

In [None]:
filter_columns += ["tone_stop_frame",
"condition ",
"competition_closeness",
"notes",
"10s_before_tone_frame",
"10s_after_tone_frame"]

In [None]:
# Define aggregation dictionary
agg_dict = {col: list for col in explode_columns if col not in groupby_columns and col not in filter_columns}

agg_dict.update({col: 'first' for col in filter_columns if col not in groupby_columns})

# Apply groupby and aggregation
grouped_exploded_columns = exploded_LFP_SPECTRAL_DF.groupby(["video_name", "current_subject", "tone_start_frame"]).agg(agg_dict).reset_index()


In [None]:
grouped_exploded_columns.head()

## Renaming the trial labels

In [None]:
grouped_exploded_columns.head()

In [None]:
comp_closeness_dict = {'Subj 1 blocking Subj 2': "competitive",
'Subj 2 Only': "no_comp",
'Subj 2 blocking Subj 1': "competitive",
'Subj 1 then Subj 2': "competitive", 
'Subj 1 Only': "no_comp",
'Subj 2 then Subj 1': "competitive",
'Close Call': "competitive",
'After trial': "no_comp"}

In [None]:
grouped_exploded_columns["trial_label"] = grouped_exploded_columns.apply(lambda x: "win" if x["current_subject"] == x["condition "]  else ("lose" if x["agent"] == x["condition "] else "tie"), axis=1)
                                                                        

In [None]:
grouped_exploded_columns["competition_closeness"] = grouped_exploded_columns["competition_closeness"].map(comp_closeness_dict)

## Getting the ranges of each cluster

- Getting the index range

In [None]:
grouped_exploded_columns["cluster_index_ranges_dict"] = grouped_exploded_columns["kmeans_cluster"].apply(lambda x: find_consecutive_ranges(x))

In [None]:
grouped_exploded_columns.head()

- Calculating the times in milliseconds of each cluster frame

In [None]:
grouped_exploded_columns["cluster_times"] = grouped_exploded_columns.apply(lambda x: (np.array(x["cluster_timestamp"]) - x["first_timestamp"]) // 20, axis=1)


- Updating the index to use cluster times and timestamps based on video frame timestamps list

In [None]:
grouped_exploded_columns["cluster_times_ranges_dict"] = grouped_exploded_columns.apply(lambda x: update_tuples_in_dict(x["cluster_index_ranges_dict"], x["cluster_times"]), axis=1)

In [None]:
grouped_exploded_columns["cluster_timestamps_ranges_dict"] = grouped_exploded_columns.apply(lambda x: update_tuples_in_dict(x["cluster_index_ranges_dict"], x["cluster_timestamp"]), axis=1)

- Combining the win and loss label with the cluster

In [None]:
grouped_exploded_columns["trial_cluster_times_ranges_dict"] = grouped_exploded_columns.apply(lambda x: {"{}_{}".format(x["trial_label"], k): v for k, v in x["cluster_times_ranges_dict"].items()}, axis=1)


In [None]:
grouped_exploded_columns["trial_cluster_timestamps_ranges_dict"] = grouped_exploded_columns.apply(lambda x: {"{}_{}".format(x["trial_label"], k): v for k, v in x["cluster_timestamps_ranges_dict"].items()}, axis=1)


- Grouping all the rows with the same video and subject together

In [None]:
list(grouped_exploded_columns.columns)

In [None]:
explode_columns

In [None]:
other_explode_columns = ["tone_stop_frame",
                         "condition ",
                         "competition_closeness",
                         "notes",
                         "10s_before_tone_frame",
                         "10s_after_tone_frame",
                        'cluster_index_ranges_dict',
 'cluster_times',
 'cluster_times_ranges_dict',
 'cluster_timestamps_ranges_dict',
 'trial_cluster_times_ranges_dict',
 'trial_cluster_timestamps_ranges_dict']

In [None]:
filter_columns

In [None]:
# Define columns to be transformed into numpy arrays


# Define aggregation dictionary
agg_dict = {col: list for col in explode_columns + other_explode_columns if col not in groupby_columns and col != "tone_start_frame"}

agg_dict.update({col: 'first' for col in filter_columns + ["tone_start_frame"] if col not in groupby_columns and col not in other_explode_columns})

# Apply groupby and aggregation
video_grouped_exploded_columns = grouped_exploded_columns.groupby(["video_name", "current_subject"]).agg(agg_dict).reset_index()


In [None]:
video_grouped_exploded_columns.columns

In [None]:
video_grouped_exploded_columns.head()

- Combining all the dictionaries together

In [None]:
for col in [_ for _ in video_grouped_exploded_columns if "dict" in _]:
    video_grouped_exploded_columns[col] = video_grouped_exploded_columns[col].apply(lambda x: combine_dicts(x))

In [None]:
video_grouped_exploded_columns.head()

In [None]:
video_grouped_exploded_columns.to_pickle("./proc/{}_cluster_ranges.pkl".format(OUTPUT_PREFIX))

In [None]:
raise ValueError()

In [None]:
video_grouped_exploded_columns.columns