# All oscillation analysis

Brief 1-2 sentence description of notebook.

In [1]:
# Imports of all used packages and libraries
import sys
import os
import git
import glob
from collections import defaultdict

In [2]:
git_repo = git.Repo(".", search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")

In [3]:
git_root

'/blue/npadillacoreano/ryoi360/projects/reward_comp/repos/reward_comp_ext'

In [4]:
sys.path.insert(0, os.path.join(git_root, 'src'))

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
import os
import collections
import itertools
from collections import defaultdict
from itertools import combinations

In [7]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
from scipy import stats
from scipy.stats import mannwhitneyu


In [8]:
import matplotlib
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import colorsys

In [9]:
from sklearn.metrics import confusion_matrix

In [10]:
from spectral_connectivity import Multitaper, Connectivity
import spectral_connectivity

In [11]:
import utilities.helper
import trodes.read_exported

In [12]:
FONTSIZE = 20

In [13]:
font = {'weight' : 'medium',
        'size'   : 20}

matplotlib.rc('font', **font)

## Inputs & Data

In [14]:
EPHYS_SAMPLE_RATE = 20000

Explanation of each input and where it comes from.

In [15]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc/" # where data is saved should always be shown in the inputs
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_PREFIX = "rce_pilot_3_alone_comp"

In [18]:
# TRIAL_LABELS_DF = pd.read_excel("/blue/npadillacoreano/ryoi360/projects/reward_comp/repos/reward_comp_ext/results/2024_06_26_sleap_clustering/data/rce_pilot_3_alone_comp_per_video_trial_labels.xlsx")
TRIALS_AND_SPECTRAL_DF = pd.read_pickle("./proc/rce_pilot_3_alone_comp_10_per_trial_spectral_bans_sleap.pkl")

In [19]:
FULL_LFP_TRACES_PKL = "{}_12_per_cluster_spectral_bans_sleap.pkl".format(OUTPUT_PREFIX)

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

## Functions 

In [21]:
def combine_dicts(dicts):
    """
    Combine lists from multiple dictionaries that share the same key.

    This function takes a list of dictionaries where each dictionary's values are lists of numbers.
    It merges these lists for each corresponding key across all dictionaries, producing a single
    dictionary where each key has a combined list of all numbers from the input dictionaries.

    Parameters:
        dicts (list of dict): A list of dictionaries with values as lists of numbers.

    Returns:
        dict: A dictionary with keys from the input dictionaries and values as merged lists 
        of numbers from all corresponding input dictionary values.
    """
    combined = defaultdict(list)
    for dictionary in dicts:
        for key, value in dictionary.items():
            combined[key].extend(value)
    
    return dict(combined)


# Example usage
list_of_dicts = [
    {'a': [1, 2], 'b': [3, 4]},
    {'a': [5], 'b': [6, 7]},
    {'a': [8, 9], 'c': [10]}
]

combined_dict = combine_dicts(list_of_dicts)
print(combined_dict)

{'a': [1, 2, 5, 8, 9], 'b': [3, 4, 6, 7], 'c': [10]}


In [22]:
def find_consecutive_ranges(numbers, min_length=1):
    """
    Finds the start and end indices for consecutive ranges of each number in a list where the range meets a minimum length.

    This function iterates through a list of integers and identifies ranges where the same integer appears consecutively
    and the length of this sequence meets or exceeds the specified minimum length.

    Parameters:
        numbers (list): A list of integers to analyze for consecutive ranges.
        min_length (int): The minimum length of a range for it to be included in the results.

    Returns:
        dict: A dictionary with integers as keys and a list of tuples (start, end) as values,
              where each tuple represents the start and end indices (inclusive) of consecutive ranges
              for that integer. Only ranges that meet or exceed the minimum length are included.
    """
    ranges = {}
    n = len(numbers)
    if n == 0:
        return ranges
    
    start = 0
    current = numbers[0]

    for i in range(1, n):
        if numbers[i] != current:
            if (i - start) >= min_length:
                if current not in ranges:
                    ranges[current] = []
                ranges[current].append((start, i - 1))
            current = numbers[i]
            start = i

    # Handle the last range
    if (n - start) >= min_length:
        if current not in ranges:
            ranges[current] = []
        ranges[current].append((start, n - 1))

    return ranges

# Example usage:
numbers = [1, 1, 2, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1]
print(find_consecutive_ranges(numbers, min_length=3))

{2: [(2, 4)], 3: [(5, 8)]}


In [23]:
def update_tuples_in_dict(original_dict, reference_list):
    """
    Updates the values in the tuples within a dictionary by replacing indices with corresponding values from a reference list.
    
    This function iterates through each key-value pair in the original dictionary. Each value is expected to be a list of tuples,
    where each tuple contains indices. These indices are used to fetch corresponding values from the reference list, creating new tuples.
    
    Parameters:
        original_dict (dict): Dictionary whose values are lists of tuples. Each tuple consists of indices into the reference_list.
        reference_list (list): List of elements that are referenced by the indices in the tuples of the original_dict.
        
    Returns:
        dict: A dictionary with the same keys as original_dict but with tuples transformed to contain elements from reference_list
              based on the indices in the original tuples.
    """
    # Create a new dictionary to store the updated key-value pairs
    new_dict = {}
    for key, list_of_tuples in original_dict.items():
        # Process each tuple in the list associated with the current key
        updated_tuples = [
            tuple(reference_list[idx] for idx in tup) for tup in list_of_tuples
        ]
        new_dict[key] = updated_tuples
    
    return new_dict

# Example usage:
original_dict = {
    'a': [(0, 1), (2, 3)],
    'b': [(1, 3), (0, 2)]
}
reference_list = ['alpha', 'beta', 'gamma', 'delta']

updated_dict = update_tuples_in_dict(original_dict, reference_list)
print(updated_dict)

{'a': [('alpha', 'beta'), ('gamma', 'delta')], 'b': [('beta', 'delta'), ('alpha', 'gamma')]}


In [88]:
def find_indices_within_ranges(ranges_dict, values):
    """
    Creates a dictionary mapping keys to sorted indices of values that fall within specified ranges.
    
    Parameters:
        ranges_dict (dict): A dictionary with keys and values as lists of tuples representing ranges.
        values (list): A list of values to check against the ranges.
        
    Returns:
        dict: A dictionary where each key maps to a sorted list of indices for values within the ranges.
    """
    result_dict = {}
    for key, ranges in ranges_dict.items():
        matched_indices = []
        for index, value in enumerate(values):
            if any(start <= value <= end for start, end in ranges):
                matched_indices.append(index)
        result_dict[key] = sorted(matched_indices)
    return list(result_dict.items())

# Example usage:
ranges_dict = {
    'range1': [(1, 5), (10, 15)],
    'range2': [(0, 2), (4, 8)]
}
values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

result = find_indices_within_ranges(ranges_dict, values)
print(result)


[('range1', [1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15]), ('range2', [0, 1, 2, 4, 5, 6, 7, 8])]


## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [20]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names




## Renaming the trial labels

In [24]:
TRIALS_AND_SPECTRAL_DF.head()

Unnamed: 0,tone_start_frame,reward_start,reward_dispensed,tone_stop_frame,condition,competition_closeness,get_reward_frame,out_reward_frame,notes,box_1_port_entry_frames,...,trial_and_post_mPFC_gamma_phase,trial_and_post_mPFC_lfp_trace,trial_and_post_mPFC_theta_band,trial_and_post_mPFC_theta_phase,trial_and_post_vHPC_gamma_band,trial_and_post_vHPC_gamma_phase,trial_and_post_vHPC_lfp_trace,trial_and_post_vHPC_theta_band,trial_and_post_vHPC_theta_phase,trial_and_post_lfp_timestamps
0,34242.0,34322,34342.0,34442.0,4.3,After trial,,,,[34250 34265],...,"[-2.8267386, -2.5752828, -2.3241265, -2.073247...","[74.295, 59.67, 49.53, 71.174995, 98.865, 105....","[0.45963967, 0.46584538, 0.4714166, 0.47629145...","[0.0660045, 0.100497134, 0.13558412, 0.1712443...","[-0.33073074, -0.2819916, -0.21614309, -0.1376...","[-2.721211, -2.4700835, -2.2192981, -1.9688195...","[100.424995, 94.38, 89.7, 106.274994, 127.1399...","[0.5714821, 0.57240754, 0.5722418, 0.57094795,...","[0.16306445, 0.20337772, 0.24396515, 0.2848233...","[34280580, 34280600, 34280620, 34280640, 34280..."
1,34242.0,34322,34342.0,34442.0,4.3,After trial,,,,[34250 34265],...,"[-2.4468, -2.2177749, -1.9918119, -1.7689933, ...","[19.695, 43.68, 68.25, 87.16499, 101.204994, 9...","[0.6209574, 0.6194976, 0.6152875, 0.6083768, 0...","[-0.14499341, -0.07693758, -0.008636221, 0.059...","[0.005282254, 0.024448069, 0.041276596, 0.0541...","[-1.484184, -1.164493, -0.85072726, -0.5423021...","[-6.24, 8.384999, 61.815, 109.005, 125.774994,...","[0.51433784, 0.4935799, 0.47049338, 0.44517246...","[0.5733892, 0.64060175, 0.7075213, 0.7741529, ...","[34280580, 34280600, 34280620, 34280640, 34280..."
2,35241.0,35321,35341.0,35439.0,4.3,Subj 2 Only,,,,[35242 35249] [35249 35250] [35250 35271] [353...,...,"[-1.3393176, -1.0746405, -0.8121532, -0.552019...","[-42.12, -46.019997, -42.899998, -34.515, -29....","[-0.55376214, -0.54105306, -0.5277178, -0.5138...","[-2.848622, -2.809103, -2.7699904, -2.7312958,...","[0.033837054, 0.08063326, 0.120246336, 0.15044...","[-1.3934808, -1.129738, -0.86815214, -0.608863...","[-65.13, -52.065, -46.215, -45.045, -38.024998...","[-0.51730657, -0.50465643, -0.49131757, -0.477...","[-2.8382177, -2.7967052, -2.75556, -2.7147894,...","[35280600, 35280620, 35280640, 35280660, 35280..."
3,35241.0,35321,35341.0,35439.0,4.3,Subj 2 Only,,,,[35242 35249] [35249 35250] [35250 35271] [353...,...,"[1.1835573, 1.3989904, 1.6151469, 1.8322233, 2...","[-84.24, -29.64, 8.969999, -31.005, -109.395, ...","[-0.43594536, -0.40849745, -0.37993425, -0.350...","[-2.290331, -2.2391956, -2.18775, -2.1359916, ...","[0.032845914, 0.01358331, -0.008819404, -0.033...","[1.2072978, 1.4309896, 1.6569884, 1.884983, 2....","[-74.1, -20.865, 18.33, -21.644999, -101.39999...","[-0.45931047, -0.42667648, -0.3932578, -0.3591...","[-2.2182832, -2.1700776, -2.1216602, -2.073028...","[35280600, 35280620, 35280640, 35280660, 35280..."
4,36339.0,36419,36439.0,36538.0,4.3,Subj 2 blocking Subj 1,,,,[36339 36354] [36361 36365] [36410 36473] [364...,...,"[1.9263768, 2.1613147, 2.397383, 2.634575, 2.8...","[7.995, 11.7, 12.674999, 5.85, -11.8949995, -3...","[0.04354602, 0.04824548, 0.050688196, 0.050923...","[-1.434966, -1.4083124, -1.3854433, -1.3673644...","[-0.17820239, -0.24864225, -0.30498573, -0.344...","[2.0645847, 2.298156, 2.5328636, 2.768695, 3.0...","[22.814999, 32.37, 31.005, 12.674999, -14.4299...","[0.007989262, 0.015576166, 0.021102728, 0.0245...","[-1.5463593, -1.5198455, -1.4966478, -1.477523...","[36380600, 36380620, 36380640, 36380660, 36380..."


In [25]:
comp_closeness_dict = {'Subj 1 blocking Subj 2': "competitive",
'Subj 2 Only': "no_comp",
'Subj 2 blocking Subj 1': "competitive",
'Subj 1 then Subj 2': "competitive", 
'Subj 1 Only': "no_comp",
'Subj 2 then Subj 1': "competitive",
'Close Call': "competitive",
'After trial': "no_comp"}

In [29]:
TRIALS_AND_SPECTRAL_DF["current_subject"] = TRIALS_AND_SPECTRAL_DF["current_subject"].apply(lambda x: str(x).strip().lower())

In [36]:
TRIALS_AND_SPECTRAL_DF["condition "] = TRIALS_AND_SPECTRAL_DF["condition "].apply(lambda x: str(x).strip().lower())

In [37]:
TRIALS_AND_SPECTRAL_DF["trial_label"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: "win" if x["current_subject"] == x["condition "]  else ("lose" if x["agent"] == x["condition "] else "tie"), axis=1)
                                                                        

In [38]:
TRIALS_AND_SPECTRAL_DF["competition_closeness"] = TRIALS_AND_SPECTRAL_DF["competition_closeness"].map(comp_closeness_dict)

In [39]:
TRIALS_AND_SPECTRAL_DF["trial_label"]

0      lose
1       win
2      lose
3       win
4      lose
       ... 
243     win
244    lose
245     win
246     tie
247     tie
Name: trial_label, Length: 248, dtype: object

## Getting the ranges of each cluster

- Getting the index range

In [41]:
list(TRIALS_AND_SPECTRAL_DF.columns)

['tone_start_frame',
 'reward_start',
 'reward_dispensed',
 'tone_stop_frame',
 'condition ',
 'competition_closeness',
 'get_reward_frame',
 'out_reward_frame',
 'notes',
 'box_1_port_entry_frames',
 'box_2_port_entry_frames',
 'video_name',
 'tone_start_timestamp',
 'tone_stop_timestamp',
 'box_1_port_entry_timestamps',
 'box_2_port_entry_timestamps',
 'current_subject',
 'subject_tail_base',
 'agent_tail_base',
 'session_dir',
 'experiment',
 'sleap_name',
 'video_id',
 'agent',
 'all_subjects',
 'coherence_calculation_frequencies',
 'cohort',
 'first_timestamp',
 'granger_calculation_frequencies',
 'last_timestamp',
 'power_calculation_frequencies',
 'recording',
 'session_path',
 'subject',
 'tone_frames',
 'tone_timestamps',
 'video_timestamps',
 'baseline_start_timestamp',
 'post_trial_end_timestamp',
 'video_frame',
 'trial_and_post_agent_locations',
 'trial_and_post_agent_nose',
 'trial_and_post_agent_nose_to_reward_port',
 'trial_and_post_agent_thorax',
 'trial_and_post_agent

In [45]:
TRIALS_AND_SPECTRAL_DF["cluster_index_ranges_dict"] = TRIALS_AND_SPECTRAL_DF["trial_and_post_kmeans_cluster"].apply(lambda x: find_consecutive_ranges(x, min_length=20))


In [46]:
TRIALS_AND_SPECTRAL_DF["cluster_index_ranges_dict"].iloc[0]

{5: [(4, 44), (58, 228), (281, 304)], 1: [(254, 280)], 2: [(358, 381)]}

In [47]:
TRIALS_AND_SPECTRAL_DF.head()

Unnamed: 0,tone_start_frame,reward_start,reward_dispensed,tone_stop_frame,condition,competition_closeness,get_reward_frame,out_reward_frame,notes,box_1_port_entry_frames,...,trial_and_post_mPFC_theta_phase,trial_and_post_vHPC_gamma_band,trial_and_post_vHPC_gamma_phase,trial_and_post_vHPC_lfp_trace,trial_and_post_vHPC_theta_band,trial_and_post_vHPC_theta_phase,trial_and_post_lfp_timestamps,trial_label,trial_and_post_cluster_index_ranges_dict,cluster_index_ranges_dict
0,34242.0,34322,34342.0,34442.0,4.3,,,,,[34250 34265],...,"[0.0660045, 0.100497134, 0.13558412, 0.1712443...","[-0.33073074, -0.2819916, -0.21614309, -0.1376...","[-2.721211, -2.4700835, -2.2192981, -1.9688195...","[100.424995, 94.38, 89.7, 106.274994, 127.1399...","[0.5714821, 0.57240754, 0.5722418, 0.57094795,...","[0.16306445, 0.20337772, 0.24396515, 0.2848233...","[34280580, 34280600, 34280620, 34280640, 34280...",lose,"{5: [(4, 44), (58, 228), (281, 304)], 1: [(254...","{5: [(4, 44), (58, 228), (281, 304)], 1: [(254..."
1,34242.0,34322,34342.0,34442.0,4.3,,,,,[34250 34265],...,"[-0.14499341, -0.07693758, -0.008636221, 0.059...","[0.005282254, 0.024448069, 0.041276596, 0.0541...","[-1.484184, -1.164493, -0.85072726, -0.5423021...","[-6.24, 8.384999, 61.815, 109.005, 125.774994,...","[0.51433784, 0.4935799, 0.47049338, 0.44517246...","[0.5733892, 0.64060175, 0.7075213, 0.7741529, ...","[34280580, 34280600, 34280620, 34280640, 34280...",win,"{5: [(4, 44), (58, 228), (281, 304)], 1: [(254...","{5: [(4, 44), (58, 228), (281, 304)], 1: [(254..."
2,35241.0,35321,35341.0,35439.0,4.3,,,,,[35242 35249] [35249 35250] [35250 35271] [353...,...,"[-2.848622, -2.809103, -2.7699904, -2.7312958,...","[0.033837054, 0.08063326, 0.120246336, 0.15044...","[-1.3934808, -1.129738, -0.86815214, -0.608863...","[-65.13, -52.065, -46.215, -45.045, -38.024998...","[-0.51730657, -0.50465643, -0.49131757, -0.477...","[-2.8382177, -2.7967052, -2.75556, -2.7147894,...","[35280600, 35280620, 35280640, 35280660, 35280...",lose,"{5: [(10, 31)], 6: [(32, 67)], 1: [(115, 161),...","{5: [(10, 31)], 6: [(32, 67)], 1: [(115, 161),..."
3,35241.0,35321,35341.0,35439.0,4.3,,,,,[35242 35249] [35249 35250] [35250 35271] [353...,...,"[-2.290331, -2.2391956, -2.18775, -2.1359916, ...","[0.032845914, 0.01358331, -0.008819404, -0.033...","[1.2072978, 1.4309896, 1.6569884, 1.884983, 2....","[-74.1, -20.865, 18.33, -21.644999, -101.39999...","[-0.45931047, -0.42667648, -0.3932578, -0.3591...","[-2.2182832, -2.1700776, -2.1216602, -2.073028...","[35280600, 35280620, 35280640, 35280660, 35280...",win,"{5: [(10, 31)], 6: [(32, 67)], 1: [(115, 161),...","{5: [(10, 31)], 6: [(32, 67)], 1: [(115, 161),..."
4,36339.0,36419,36439.0,36538.0,4.3,,,,,[36339 36354] [36361 36365] [36410 36473] [364...,...,"[-1.434966, -1.4083124, -1.3854433, -1.3673644...","[-0.17820239, -0.24864225, -0.30498573, -0.344...","[2.0645847, 2.298156, 2.5328636, 2.768695, 3.0...","[22.814999, 32.37, 31.005, 12.674999, -14.4299...","[0.007989262, 0.015576166, 0.021102728, 0.0245...","[-1.5463593, -1.5198455, -1.4966478, -1.477523...","[36380600, 36380620, 36380640, 36380660, 36380...",lose,"{3: [(10, 29)], 1: [(108, 130), (329, 350), (3...","{3: [(10, 29)], 1: [(108, 130), (329, 350), (3..."


In [50]:
TRIALS_AND_SPECTRAL_DF["cluster_timestamp"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: x["video_timestamps"][x["trial_and_post_frame_index"]], axis=1)


- Calculating the times in milliseconds of each cluster frame

In [53]:
TRIALS_AND_SPECTRAL_DF["cluster_times"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: (np.array(x["cluster_timestamp"]) - x["first_timestamp"]) // 20, axis=1)


- Updating the index to use cluster times and timestamps based on video frame timestamps list

In [54]:
TRIALS_AND_SPECTRAL_DF["cluster_times_ranges_dict"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: update_tuples_in_dict(x["cluster_index_ranges_dict"], x["cluster_times"]), axis=1)

In [55]:
TRIALS_AND_SPECTRAL_DF["cluster_timestamps_ranges_dict"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: update_tuples_in_dict(x["cluster_index_ranges_dict"], x["cluster_timestamp"]), axis=1)

- Combining the win and loss label with the cluster

In [56]:
TRIALS_AND_SPECTRAL_DF["trial_cluster_times_ranges_dict"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: {"{}_{}".format(x["trial_label"], k): v for k, v in x["cluster_times_ranges_dict"].items()}, axis=1)


In [57]:
TRIALS_AND_SPECTRAL_DF["trial_cluster_timestamps_ranges_dict"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: {"{}_{}".format(x["trial_label"], k): v for k, v in x["cluster_timestamps_ranges_dict"].items()}, axis=1)


In [59]:
TRIALS_AND_SPECTRAL_DF["trial_cluster_timestamps_ranges_dict"].iloc[0]

{'lose_5': [(34285025, 34325215), (34339074, 34509536), (34562199, 34585759)],
 'lose_1': [(34535867, 34562199)],
 'lose_2': [(34639808, 34661982)]}

In [61]:
TRIALS_AND_SPECTRAL_DF.columns

Index(['tone_start_frame', 'reward_start', 'reward_dispensed',
       'tone_stop_frame', 'condition ', 'competition_closeness',
       'get_reward_frame', 'out_reward_frame', 'notes',
       'box_1_port_entry_frames',
       ...
       'trial_and_post_lfp_timestamps', 'trial_label',
       'trial_and_post_cluster_index_ranges_dict', 'cluster_index_ranges_dict',
       'cluster_timestamp', 'cluster_times', 'cluster_times_ranges_dict',
       'cluster_timestamps_ranges_dict', 'trial_cluster_times_ranges_dict',
       'trial_cluster_timestamps_ranges_dict'],
      dtype='object', length=212)

In [62]:
TRIALS_AND_SPECTRAL_DF["trial_and_post_coherence_timestamps"].iloc[0]

array([34290000, 34300000, 34310000, 34320000, 34330000, 34340000,
       34350000, 34360000, 34370000, 34380000, 34390000, 34400000,
       34410000, 34420000, 34430000, 34440000, 34450000, 34460000,
       34470000, 34480000, 34490000, 34500000, 34510000, 34520000,
       34530000, 34540000, 34550000, 34560000, 34570000, 34580000,
       34590000, 34600000, 34610000, 34620000, 34630000, 34640000,
       34650000, 34660000, 34670000, 34680000])

In [67]:
timestamps_col = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "timestamps" in col and "trial_and_post" in col and "video" not in col]

In [66]:
timestamps_col

['trial_and_post_video_timestamps',
 'trial_and_post_coherence_timestamps',
 'trial_and_post_granger_timestamps',
 'trial_and_post_power_timestamps',
 'trial_and_post_lfp_timestamps']

In [79]:
for col in timestamps_col:
    base_col = col.replace("trial_and_post_", "")
    TRIALS_AND_SPECTRAL_DF["cluster_filtered_{}".format(base_col)] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: find_indices_within_ranges(x["trial_cluster_timestamps_ranges_dict"], x[col]), axis=1)

In [80]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF[TRIALS_AND_SPECTRAL_DF["cluster_filtered_power_timestamps"].apply(lambda x: len(x)) >= 1] 

In [81]:
power_exploded_TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.explode(["cluster_filtered_power_timestamps"])

In [82]:
power_exploded_TRIALS_AND_SPECTRAL_DF["cluster_filtered_power_timestamps"]

0      (lose_5, [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 1...
0                                 (lose_1, [25, 26, 27])
0                                 (lose_2, [35, 36, 37])
1      (win_5, [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12...
1                                  (win_1, [25, 26, 27])
                             ...                        
246    (tie_0, [15, 16, 17, 18, 19, 20, 21, 22, 23, 2...
246                                (tie_7, [30, 31, 32])
247             (tie_3, [5, 6, 7, 8, 9, 10, 37, 38, 39])
247    (tie_0, [15, 16, 17, 18, 19, 20, 21, 22, 23, 2...
247                                (tie_7, [30, 31, 32])
Name: cluster_filtered_power_timestamps, Length: 836, dtype: object

In [83]:
power_exploded_TRIALS_AND_SPECTRAL_DF["power_cluster_id"] = power_exploded_TRIALS_AND_SPECTRAL_DF["cluster_filtered_power_timestamps"].apply(lambda x: x[0])
power_exploded_TRIALS_AND_SPECTRAL_DF["power_cluster_indexes"] = power_exploded_TRIALS_AND_SPECTRAL_DF["cluster_filtered_power_timestamps"].apply(lambda x: x[1])

In [86]:
power_columns = [col for col in power_exploded_TRIALS_AND_SPECTRAL_DF if "power" in col and "trial_and_post" in col and "timestamps" not in col]

In [87]:
power_columns

['trial_and_post_BLA_RMS_filtered_power_all_frequencies_all_windows',
 'trial_and_post_BLA_RMS_filtered_power_gamma',
 'trial_and_post_BLA_RMS_filtered_power_theta',
 'trial_and_post_LH_RMS_filtered_power_all_frequencies_all_windows',
 'trial_and_post_LH_RMS_filtered_power_gamma',
 'trial_and_post_LH_RMS_filtered_power_theta',
 'trial_and_post_MD_RMS_filtered_power_all_frequencies_all_windows',
 'trial_and_post_MD_RMS_filtered_power_gamma',
 'trial_and_post_MD_RMS_filtered_power_theta',
 'trial_and_post_mPFC_RMS_filtered_power_all_frequencies_all_windows',
 'trial_and_post_mPFC_RMS_filtered_power_gamma',
 'trial_and_post_mPFC_RMS_filtered_power_theta',
 'trial_and_post_vHPC_RMS_filtered_power_all_frequencies_all_windows',
 'trial_and_post_vHPC_RMS_filtered_power_gamma',
 'trial_and_post_vHPC_RMS_filtered_power_theta']

In [89]:
for col in power_columns:
    power_exploded_TRIALS_AND_SPECTRAL_DF["cluster_{}".format(col)] = power_exploded_TRIALS_AND_SPECTRAL_DF.apply(lambda x: x[col][x["power_cluster_indexes"]], axis=1)

In [92]:
power_exploded_TRIALS_AND_SPECTRAL_DF["cluster_{}".format(col)].iloc[0].shape

(23,)

In [94]:
len(power_exploded_TRIALS_AND_SPECTRAL_DF["power_cluster_indexes"].iloc[0])

23

In [95]:
power_exploded_TRIALS_AND_SPECTRAL_DF

Unnamed: 0,tone_start_frame,reward_start,reward_dispensed,tone_stop_frame,condition,competition_closeness,get_reward_frame,out_reward_frame,notes,box_1_port_entry_frames,...,cluster_trial_and_post_LH_RMS_filtered_power_theta,cluster_trial_and_post_MD_RMS_filtered_power_all_frequencies_all_windows,cluster_trial_and_post_MD_RMS_filtered_power_gamma,cluster_trial_and_post_MD_RMS_filtered_power_theta,cluster_trial_and_post_mPFC_RMS_filtered_power_all_frequencies_all_windows,cluster_trial_and_post_mPFC_RMS_filtered_power_gamma,cluster_trial_and_post_mPFC_RMS_filtered_power_theta,cluster_trial_and_post_vHPC_RMS_filtered_power_all_frequencies_all_windows,cluster_trial_and_post_vHPC_RMS_filtered_power_gamma,cluster_trial_and_post_vHPC_RMS_filtered_power_theta
0,34242.0,34322,34342.0,34442.0,4.3,,,,,[34250 34265],...,"[0.049118653, 0.04432029, 0.022806628, 0.01454...","[[0.017404627, 0.009237435, 0.020289144, 0.037...","[0.0010392, 0.0017778483, 0.0017340889, 0.0018...","[0.049298737, 0.04430734, 0.02234565, 0.015264...","[[0.010256077, 0.005490159, 0.014048004, 0.034...","[0.0011133909, 0.0017187449, 0.0016338199, 0.0...","[0.048836093, 0.03983216, 0.018579122, 0.01443...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[nan, nan, 0.0017035144, 0.0019062511, 0.00202...","[nan, nan, 0.021297257, 0.012002589, 0.0345453..."
0,34242.0,34322,34342.0,34442.0,4.3,,,,,[34250 34265],...,"[0.03340388, 0.013828592, 0.032644738]","[[0.0011329819, 0.0029382098, 0.010730247, 0.0...","[0.00138236, 0.0015774369, 0.0015782466]","[0.033414055, 0.014103232, 0.030887991]","[[0.0007109967, 0.0025325161, 0.010118886, 0.0...","[0.0015798573, 0.0018669195, 0.001755629]","[0.034668207, 0.0144437505, 0.033530086]","[[0.0010037111, 0.002760056, 0.011625801, 0.01...","[0.0015151085, 0.0019391747, 0.0017612593]","[0.031577937, 0.012718529, 0.028678037]"
0,34242.0,34322,34342.0,34442.0,4.3,,,,,[34250 34265],...,"[0.026547624, 0.040914092, 0.024725916]","[[0.0043959557, 0.019276168, 0.028550087, 0.02...","[0.0018075058, 0.0020755674, 0.002119519]","[0.027208393, 0.04165223, 0.024425242]","[[0.0029105109, 0.017083567, 0.025493046, 0.02...","[0.0020433604, 0.0019597295, 0.002066028]","[0.025156047, 0.036479283, 0.023404984]","[[0.0037428935, 0.01909009, 0.026942024, 0.027...","[0.0019578887, 0.0018662137, 0.001895907]","[0.026043838, 0.03870262, 0.023299344]"
1,34242.0,34322,34342.0,34442.0,4.3,,,,,[34250 34265],...,"[0.043261394, 0.033036076, 0.023986526, 0.0426...","[[0.0026740688, 0.00498721, 0.016309006, 0.041...","[0.0015232281, 0.001145195, 0.0011130486, 0.00...","[0.04461408, 0.0295235, 0.02294997, 0.03491530...","[[0.0048666033, 0.015937502, 0.02933665, 0.060...","[0.0015928985, 0.0009774407, 0.00082807784, 0....","[0.044317845, 0.035353914, 0.025194542, 0.0402...","[[0.0037987193, 0.0090422025, 0.025519658, 0.0...","[0.0022985975, 0.0015938631, 0.0012393639, 0.0...","[0.0398159, 0.034608986, 0.025157357, 0.046068..."
1,34242.0,34322,34342.0,34442.0,4.3,,,,,[34250 34265],...,"[0.030990265, 0.030563287, 0.046492025]","[[2.900416e-05, 0.0003022305, 0.008950673, 0.0...","[0.0010723824, 0.0008024939, 0.00083178625]","[0.031504564, 0.0312215, 0.044477757]","[[0.00010818613, 0.00028267736, 0.009814792, 0...","[0.0011198713, 0.0007631681, 0.0009200637]","[0.030414684, 0.03258735, 0.048469886]","[[0.000108958506, 0.00035143815, 0.0077658286,...","[0.0008281053, 0.00066190725, 0.0006686406]","[0.026542457, 0.026474128, 0.03777187]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246,61174.0,61254,61274.0,61375.0,tie,,,,,[61188 61211] [61211 61214] [61214 61220] [612...,...,"[0.034779496, 0.030795025, 0.024519028, 0.0264...","[[0.007165184, 0.02350792, 0.03555188, 0.03744...","[0.0015547881, 0.0010615957, 0.0015940937, 0.0...","[0.037937496, 0.031846806, 0.023657246, 0.0258...","[[0.007405589, 0.023178458, 0.0334355, 0.03525...","[0.0018750331, 0.0011793282, 0.0017875882, 0.0...","[0.03504678, 0.030210767, 0.02331958, 0.027523...","[[0.0074433764, 0.027607517, 0.03864226, 0.039...","[0.0018224685, 0.001228588, 0.0019482669, 0.00...","[0.033014286, 0.027767176, 0.02384674, 0.02729..."
246,61174.0,61254,61274.0,61375.0,tie,,,,,[61188 61211] [61211 61214] [61214 61220] [612...,...,"[0.02897481, 0.03992848, 0.042006325]","[[0.0059307134, 0.0063216523, 0.018682986, 0.0...","[0.0014880112, 0.0014743354, 0.0012698664]","[0.030244596, 0.038799666, 0.040436637]","[[0.0048454874, 0.006485032, 0.019611292, 0.04...","[0.0015871755, 0.0014100957, 0.0012493795]","[0.026064148, 0.033288274, 0.03844673]","[[0.005925396, 0.008483903, 0.023374194, 0.046...","[0.0017502569, 0.0014982653, 0.0013362867]","[0.027269222, 0.03432478, 0.037928835]"
247,61174.0,61254,61274.0,61375.0,tie,,,,,[61188 61211] [61211 61214] [61214 61220] [612...,...,"[0.023800597, 0.026005648, 0.017076336, 0.0177...","[[0.003955681, 0.0045808423, 0.021397913, 0.02...","[0.0013840718, 0.0018056248, 0.001683403, 0.00...","[0.022839582, 0.026060395, 0.017273102, 0.0186...","[[0.005517801, 0.008576511, 0.025602443, 0.030...","[0.0014785485, 0.0019368517, 0.0018095085, 0.0...","[0.022574034, 0.026939044, 0.019214658, 0.0197...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[nan, nan, nan, nan, nan, nan, nan, nan, nan]","[nan, nan, nan, nan, nan, nan, nan, nan, nan]"
247,61174.0,61254,61274.0,61375.0,tie,,,,,[61188 61211] [61211 61214] [61214 61220] [612...,...,"[0.032219436, 0.020639228, 0.018929325, 0.0167...","[[2.7145168e-05, 0.0015520303, 0.013508321, 0....","[0.0026441943, 0.0021435444, 0.0020745182, 0.0...","[0.031723026, 0.020868437, 0.017366085, 0.0157...","[[5.9284645e-05, 0.0014917682, 0.01275937, 0.0...","[0.0026853066, 0.0024094502, 0.0022870433, 0.0...","[0.031214543, 0.02266122, 0.019571435, 0.01381...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."


In [60]:
raise ValueError()

ValueError: 

- Grouping all the rows with the same video and subject together

In [55]:
list(TRIALS_AND_SPECTRAL_DF.columns)

['video_name',
 'current_subject',
 'tone_start_frame',
 'frame_index',
 'subject_thorax_to_agent_thorax',
 'nose_to_reward_port_sum',
 'nose_to_reward_port_diff',
 'thorax_velocity_sum',
 'thorax_velocity_diff',
 'to_reward_port_angle_sum',
 'to_reward_port_angle_diff',
 'subject_nose_to_reward_port',
 'subject_thorax_velocity',
 'subject_to_reward_port_angle',
 'agent_nose_to_reward_port',
 'agent_thorax_velocity',
 'agent_to_reward_port_angle',
 'closebool_subject_nose_to_reward_port',
 'closebool_agent_nose_to_reward_port',
 'movingbool_subject_thorax_velocity',
 'movingbool_agent_thorax_velocity',
 'manual_cluster_id',
 'standard_embedding_x',
 'standard_embedding_y',
 'kmeans_cluster',
 'subject_locations',
 'agent_locations',
 'subject_thorax',
 'subject_nose',
 'subject_tail_base',
 'agent_thorax',
 'agent_nose',
 'agent_tail_base',
 'session_dir',
 'sleap_name',
 'video_id',
 'agent',
 'cluster_timestamp',
 'first_timestamp',
 'recording',
 'tone_stop_frame',
 'condition ',
 '

In [56]:
explode_columns

['frame_index',
 'tone_start_frame',
 'tone_stop_frame',
 'condition ',
 'competition_closeness',
 'notes',
 '10s_before_tone_frame',
 '10s_after_tone_frame',
 'subject_thorax_to_agent_thorax',
 'nose_to_reward_port_sum',
 'nose_to_reward_port_diff',
 'thorax_velocity_sum',
 'thorax_velocity_diff',
 'to_reward_port_angle_sum',
 'to_reward_port_angle_diff',
 'subject_nose_to_reward_port',
 'subject_thorax_velocity',
 'subject_to_reward_port_angle',
 'agent_nose_to_reward_port',
 'agent_thorax_velocity',
 'agent_to_reward_port_angle',
 'closebool_subject_nose_to_reward_port',
 'closebool_agent_nose_to_reward_port',
 'movingbool_subject_thorax_velocity',
 'movingbool_agent_thorax_velocity',
 'manual_cluster_id',
 'standard_embedding_x',
 'standard_embedding_y',
 'kmeans_cluster',
 'subject_locations',
 'agent_locations',
 'subject_thorax',
 'subject_nose',
 'subject_tail_base',
 'agent_thorax',
 'agent_nose',
 'agent_tail_base']

In [57]:
other_explode_columns = ["tone_stop_frame",
                         "condition ",
                         "competition_closeness",
                         "notes",
                         "10s_before_tone_frame",
                         "10s_after_tone_frame",
                        'cluster_index_ranges_dict',
 'cluster_times',
 'cluster_times_ranges_dict',
 'cluster_timestamps_ranges_dict',
 'trial_cluster_times_ranges_dict',
 'trial_cluster_timestamps_ranges_dict']

In [58]:
filter_columns

['video_name',
 'current_subject',
 'session_dir',
 'sleap_name',
 'video_id',
 'agent',
 'cluster_timestamp',
 'first_timestamp',
 'recording',
 'tone_stop_frame',
 'condition ',
 'competition_closeness',
 'notes',
 '10s_before_tone_frame',
 '10s_after_tone_frame']

In [59]:
# Define columns to be transformed into numpy arrays


# Define aggregation dictionary
agg_dict = {col: list for col in explode_columns + other_explode_columns if col not in groupby_columns and col != "tone_start_frame"}

agg_dict.update({col: 'first' for col in filter_columns + ["tone_start_frame"] if col not in groupby_columns and col not in other_explode_columns})

# Apply groupby and aggregation
video_TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.groupby(["video_name", "current_subject"]).agg(agg_dict).reset_index()


In [60]:
video_TRIALS_AND_SPECTRAL_DF.columns

Index(['video_name', 'current_subject', 'frame_index', 'tone_stop_frame',
       'condition ', 'competition_closeness', 'notes', '10s_before_tone_frame',
       '10s_after_tone_frame', 'subject_thorax_to_agent_thorax',
       'nose_to_reward_port_sum', 'nose_to_reward_port_diff',
       'thorax_velocity_sum', 'thorax_velocity_diff',
       'to_reward_port_angle_sum', 'to_reward_port_angle_diff',
       'subject_nose_to_reward_port', 'subject_thorax_velocity',
       'subject_to_reward_port_angle', 'agent_nose_to_reward_port',
       'agent_thorax_velocity', 'agent_to_reward_port_angle',
       'closebool_subject_nose_to_reward_port',
       'closebool_agent_nose_to_reward_port',
       'movingbool_subject_thorax_velocity',
       'movingbool_agent_thorax_velocity', 'manual_cluster_id',
       'standard_embedding_x', 'standard_embedding_y', 'kmeans_cluster',
       'subject_locations', 'agent_locations', 'subject_thorax',
       'subject_nose', 'subject_tail_base', 'agent_thorax', 'agen

In [61]:
video_TRIALS_AND_SPECTRAL_DF.head()

Unnamed: 0,video_name,current_subject,frame_index,tone_stop_frame,condition,competition_closeness,notes,10s_before_tone_frame,10s_after_tone_frame,subject_thorax_to_agent_thorax,...,cluster_timestamps_ranges_dict,trial_cluster_times_ranges_dict,trial_cluster_timestamps_ranges_dict,session_dir,sleap_name,video_id,agent,cluster_timestamp,first_timestamp,recording
0,20240320_171038_alone_comp_subj_4-2_and_4-3.1,4.2,"[[34042, 34043, 34044, 34045, 34046, 34047, 34...","[34442, 35439, 36538, 37535, 39033, 40731, 430...","[4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, ...","[no_comp, no_comp, competitive, competitive, n...","[nan, nan, nan, nan, nan, nan, nan, nan, 4.2 b...","[34042, 35041, 36139, 37136, 38634, 40330, 426...","[34642, 35639, 36738, 37735, 39233, 40931, 432...","[[4.873386, 4.9368286, 4.95431, 4.9935803, 5.1...",...,"[{7: [(34079916, 34093775), (34668911, 3467168...","[{'lose_7': [(1600609, 1601302), (1630059, 163...","[{'lose_7': [(34079916, 34093775), (34668911, ...",20240320_171038_alone_comp_subj_4-2_and_4-3,20240320_171038_alone_comp_subj_4-2_and_4-3.1....,17,4.3,"[34079916, 34081302, 34082688, 34084074, 34084...",2067718,20240320_171038_alone_comp_subj_4-2_t6b6_merged
1,20240320_171038_alone_comp_subj_4-2_and_4-3.1,4.3,"[[34042, 34043, 34044, 34045, 34046, 34047, 34...","[34442, 35439, 36538, 37535, 39033, 40731, 430...","[4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, ...","[no_comp, no_comp, competitive, competitive, n...","[nan, nan, nan, nan, nan, nan, nan, nan, 4.2 b...","[34042, 35041, 36139, 37136, 38634, 40330, 426...","[34642, 35639, 36738, 37735, 39233, 40931, 432...","[[4.873386, 4.9368286, 4.95431, 4.9935803, 5.1...",...,"[{7: [(34079916, 34093775), (34668911, 3467168...","[{'win_7': [(1600609, 1601302), (1630059, 1630...","[{'win_7': [(34079916, 34093775), (34668911, 3...",20240320_171038_alone_comp_subj_4-2_and_4-3,20240320_171038_alone_comp_subj_4-2_and_4-3.1....,17,4.2,"[34079916, 34081302, 34082688, 34084074, 34084...",2067718,20240320_171038_alone_comp_subj_4-3_t5b5_merged
2,20240322_120625_alone_comp_subj_3-3_and_3-4.1,3.3,"[[34069, 34070, 34071, 34072, 34073, 34074, 34...","[34468, 35465, 36564, 37562, 39059, 40756, 430...","[3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, ...","[competitive, no_comp, competitive, competitiv...","[nan, 3.4 got reward at 35432, nan, nan, nan, ...","[34069, 35066, 36165, 37162, 38660, 40356, 426...","[34668, 35665, 36764, 37762, 39259, 40956, 432...","[[8.018456, 7.997978, 7.9187427, 7.8369646, 7....",...,"[{3: [(34103476, 34103476), (34169998, 3420880...","[{'tie_3': [(1524248, 1524248), (1527574, 1529...","[{'tie_3': [(34103476, 34103476), (34169998, 3...",20240322_120625_alone_comp_subj_3-3_and_3-4,20240322_120625_alone_comp_subj_3-3_and_3-4.1....,18,3.4,"[34103476, 34104862, 34106248, 34106248, 34107...",3618506,20240322_120625_alone_comp_subj_3-3_t6b6_merged
3,20240322_120625_alone_comp_subj_3-3_and_3-4.1,3.4,"[[34069, 34070, 34071, 34072, 34073, 34074, 34...","[34468, 35465, 36564, 37562, 39059, 40756, 430...","[3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, ...","[competitive, no_comp, competitive, competitiv...","[nan, 3.4 got reward at 35432, nan, nan, nan, ...","[34069, 35066, 36165, 37162, 38660, 40356, 426...","[34668, 35665, 36764, 37762, 39259, 40956, 432...","[[8.018456, 7.997978, 7.9187427, 7.8369646, 7....",...,"[{3: [(34103476, 34103476), (34169998, 3420880...","[{'tie_3': [(1524248, 1524248), (1527574, 1529...","[{'tie_3': [(34103476, 34103476), (34169998, 3...",20240322_120625_alone_comp_subj_3-3_and_3-4,20240322_120625_alone_comp_subj_3-3_and_3-4.1....,18,3.3,"[34103476, 34104862, 34106248, 34106248, 34107...",3618506,20240322_120625_alone_comp_subj_3-4_t5b5_merged
4,20240322_160946_alone_comp_subj_4-3_and_4-4.1,4.3,"[[34063, 34064, 34065, 34066, 34067, 34068, 34...","[34462, 35461, 36558, 37557, 39054, 40751, 430...","[4.3, 4.3, 4.3, 4.3, 4.3, 4.4, 4.3, 4.3, 4.3, ...","[competitive, no_comp, competitive, competitiv...","[4.3 got reward at 34433, 4.3 got reward at 35...","[34063, 35061, 36159, 37158, 38654, 40352, 426...","[34662, 35661, 36758, 37757, 39254, 40951, 432...","[[2.1562233, 2.2290666, 2.3999894, 2.5436177, ...",...,"[{7: [(34096547, 34106248), (34109019, 3412980...","[{'tie_7': [(1438255, 1438740), (1438878, 1439...","[{'tie_7': [(34096547, 34106248), (34109019, 3...",20240322_160946_alone_comp_subj_4-3_and_4-4,20240322_160946_alone_comp_subj_4-3_and_4-4.1....,19,4.4,"[34096547, 34097933, 34098977, 34099318, 34100...",5331441,20240322_160946_alone_comp_subj_4-3_t6b6_merged


- Combining all the dictionaries together

In [62]:
for col in [_ for _ in video_TRIALS_AND_SPECTRAL_DF if "dict" in _]:
    video_TRIALS_AND_SPECTRAL_DF[col] = video_TRIALS_AND_SPECTRAL_DF[col].apply(lambda x: combine_dicts(x))

In [63]:
video_TRIALS_AND_SPECTRAL_DF.head()

Unnamed: 0,video_name,current_subject,frame_index,tone_stop_frame,condition,competition_closeness,notes,10s_before_tone_frame,10s_after_tone_frame,subject_thorax_to_agent_thorax,...,cluster_timestamps_ranges_dict,trial_cluster_times_ranges_dict,trial_cluster_timestamps_ranges_dict,session_dir,sleap_name,video_id,agent,cluster_timestamp,first_timestamp,recording
0,20240320_171038_alone_comp_subj_4-2_and_4-3.1,4.2,"[[34042, 34043, 34044, 34045, 34046, 34047, 34...","[34442, 35439, 36538, 37535, 39033, 40731, 430...","[4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, ...","[no_comp, no_comp, competitive, competitive, n...","[nan, nan, nan, nan, nan, nan, nan, nan, 4.2 b...","[34042, 35041, 36139, 37136, 38634, 40330, 426...","[34642, 35639, 36738, 37735, 39233, 40931, 432...","[[4.873386, 4.9368286, 4.95431, 4.9935803, 5.1...",...,"{7: [(34079916, 34093775), (34668911, 34671683...","{'lose_7': [(1600609, 1601302), (1630059, 1630...","{'lose_7': [(34079916, 34093775), (34668911, 3...",20240320_171038_alone_comp_subj_4-2_and_4-3,20240320_171038_alone_comp_subj_4-2_and_4-3.1....,17,4.3,"[34079916, 34081302, 34082688, 34084074, 34084...",2067718,20240320_171038_alone_comp_subj_4-2_t6b6_merged
1,20240320_171038_alone_comp_subj_4-2_and_4-3.1,4.3,"[[34042, 34043, 34044, 34045, 34046, 34047, 34...","[34442, 35439, 36538, 37535, 39033, 40731, 430...","[4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, 4.3, ...","[no_comp, no_comp, competitive, competitive, n...","[nan, nan, nan, nan, nan, nan, nan, nan, 4.2 b...","[34042, 35041, 36139, 37136, 38634, 40330, 426...","[34642, 35639, 36738, 37735, 39233, 40931, 432...","[[4.873386, 4.9368286, 4.95431, 4.9935803, 5.1...",...,"{7: [(34079916, 34093775), (34668911, 34671683...","{'win_7': [(1600609, 1601302), (1630059, 16301...","{'win_7': [(34079916, 34093775), (34668911, 34...",20240320_171038_alone_comp_subj_4-2_and_4-3,20240320_171038_alone_comp_subj_4-2_and_4-3.1....,17,4.2,"[34079916, 34081302, 34082688, 34084074, 34084...",2067718,20240320_171038_alone_comp_subj_4-3_t5b5_merged
2,20240322_120625_alone_comp_subj_3-3_and_3-4.1,3.3,"[[34069, 34070, 34071, 34072, 34073, 34074, 34...","[34468, 35465, 36564, 37562, 39059, 40756, 430...","[3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, ...","[competitive, no_comp, competitive, competitiv...","[nan, 3.4 got reward at 35432, nan, nan, nan, ...","[34069, 35066, 36165, 37162, 38660, 40356, 426...","[34668, 35665, 36764, 37762, 39259, 40956, 432...","[[8.018456, 7.997978, 7.9187427, 7.8369646, 7....",...,"{3: [(34103476, 34103476), (34169998, 34208802...","{'tie_3': [(1524248, 1524248), (1527574, 15295...","{'tie_3': [(34103476, 34103476), (34169998, 34...",20240322_120625_alone_comp_subj_3-3_and_3-4,20240322_120625_alone_comp_subj_3-3_and_3-4.1....,18,3.4,"[34103476, 34104862, 34106248, 34106248, 34107...",3618506,20240322_120625_alone_comp_subj_3-3_t6b6_merged
3,20240322_120625_alone_comp_subj_3-3_and_3-4.1,3.4,"[[34069, 34070, 34071, 34072, 34073, 34074, 34...","[34468, 35465, 36564, 37562, 39059, 40756, 430...","[3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, ...","[competitive, no_comp, competitive, competitiv...","[nan, 3.4 got reward at 35432, nan, nan, nan, ...","[34069, 35066, 36165, 37162, 38660, 40356, 426...","[34668, 35665, 36764, 37762, 39259, 40956, 432...","[[8.018456, 7.997978, 7.9187427, 7.8369646, 7....",...,"{3: [(34103476, 34103476), (34169998, 34208802...","{'tie_3': [(1524248, 1524248), (1527574, 15295...","{'tie_3': [(34103476, 34103476), (34169998, 34...",20240322_120625_alone_comp_subj_3-3_and_3-4,20240322_120625_alone_comp_subj_3-3_and_3-4.1....,18,3.3,"[34103476, 34104862, 34106248, 34106248, 34107...",3618506,20240322_120625_alone_comp_subj_3-4_t5b5_merged
4,20240322_160946_alone_comp_subj_4-3_and_4-4.1,4.3,"[[34063, 34064, 34065, 34066, 34067, 34068, 34...","[34462, 35461, 36558, 37557, 39054, 40751, 430...","[4.3, 4.3, 4.3, 4.3, 4.3, 4.4, 4.3, 4.3, 4.3, ...","[competitive, no_comp, competitive, competitiv...","[4.3 got reward at 34433, 4.3 got reward at 35...","[34063, 35061, 36159, 37158, 38654, 40352, 426...","[34662, 35661, 36758, 37757, 39254, 40951, 432...","[[2.1562233, 2.2290666, 2.3999894, 2.5436177, ...",...,"{7: [(34096547, 34106248), (34109019, 34129808...","{'tie_7': [(1438255, 1438740), (1438878, 14399...","{'tie_7': [(34096547, 34106248), (34109019, 34...",20240322_160946_alone_comp_subj_4-3_and_4-4,20240322_160946_alone_comp_subj_4-3_and_4-4.1....,19,4.4,"[34096547, 34097933, 34098977, 34099318, 34100...",5331441,20240322_160946_alone_comp_subj_4-3_t6b6_merged


In [64]:
video_TRIALS_AND_SPECTRAL_DF.to_pickle("./proc/{}_cluster_ranges.pkl".format(OUTPUT_PREFIX))

# Filtering out the SLEAP posed for during trials

In [None]:
raise ValueError()

In [None]:
TRIALS_AND_SPECTRAL_DF

In [None]:
TRIALS_AND_SPECTRAL_DF["video_frame"] = TRIALS_AND_SPECTRAL_DF["video_timestamps"].apply(lambda x: np.array(list(range(len(x)))) + 1)

In [None]:
TRIALS_AND_SPECTRAL_DF["video_frame"].head().apply(lambda x: x.shape)

In [None]:
['subject_thorax_to_agent_thorax',
 'nose_to_reward_port_sum',
 'nose_to_reward_port_diff',
 'thorax_velocity_sum',
 'thorax_velocity_diff',
 'to_reward_port_angle_sum',
 'to_reward_port_angle_diff',
 'subject_nose_to_reward_port',
 'subject_thorax_velocity',
 'subject_to_reward_port_angle',
 'agent_nose_to_reward_port',
 'agent_thorax_velocity',
 'agent_to_reward_port_angle',
 'closebool_subject_nose_to_reward_port',
 'closebool_agent_nose_to_reward_port',
 'movingbool_subject_thorax_velocity',
 'movingbool_agent_thorax_velocity',
 'manual_cluster_id',
 'standard_embedding_x',
 'standard_embedding_y',
 'kmeans_cluster',
 'subject_locations',
 'agent_locations',
 'subject_thorax',
 'subject_nose',
 'subject_tail_base',
 'agent_thorax',
 'agent_nose',
 'agent_tail_base']

In [None]:
# sleap_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "locations" in col or "velocity" in col or "to_reward_port" in col or "video_frame" in col]

In [None]:
sleap_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if ("thorax" in col or "nose" in col or "reward_port" in col or "standard_embedding" in col or "cluster" in col or "frame_index" in col or "locations" in col or "tailbase" in col) and "timestamp" not in col]


In [None]:
sleap_columns

In [None]:
for col in sleap_columns:
    print(col)
    print(TRIALS_AND_SPECTRAL_DF[col].apply(lambda x: x.shape).iloc[0])

In [None]:
for col in sorted(sleap_columns):
    updated_item_col = "trial_and_post_{}".format(col)
    print(updated_item_col)
    updated_timestamp_col = "trial_and_post_video_timestamps"
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["cluster_timestamp"], items=x[col])[1], axis=1)

TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["cluster_timestamp"], items=x[col])[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF.head()

In [None]:
TRIALS_AND_SPECTRAL_DF["trial_and_post_frame_index"].iloc[0].shape

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=sleap_columns + ["cluster_timestamp"], errors="ignore")

In [None]:
for col in TRIALS_AND_SPECTRAL_DF.columns:
    print(col)

- Filtering coherence

In [None]:
coherence_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "coherence" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
coherence_columns

In [None]:
for col in sorted(coherence_columns):
    updated_item_col = "trial_and_post_{}".format(col)
    print(updated_item_col)
    updated_timestamp_col = "trial_and_post_coherence_timestamps"
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["coherence_timestamps"], items=x[col])[1], axis=1)

TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["coherence_timestamps"], items=x[col])[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=coherence_columns + ["coherence_timestamps"], errors="ignore")

- Filtering Grangers

In [None]:
granger_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "granger" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
granger_columns

In [None]:
for col in sorted(granger_columns):
    updated_item_col = "trial_and_post_{}".format(col)
    print(updated_item_col)
    updated_timestamp_col = "trial_and_post_granger_timestamps"
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["granger_timestamps"], items=x[col])[1], axis=1)

TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["granger_timestamps"], items=x[col])[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=granger_columns + ["granger_timestamps"], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

- Filtering power

In [None]:
TRIALS_AND_SPECTRAL_DF.head()

In [None]:
power_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "power" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
power_columns

In [None]:
for col in sorted(power_columns):
    updated_item_col = "trial_and_post_{}".format(col)
    print(updated_item_col)
    updated_timestamp_col = "trial_and_post_power_timestamps"
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["power_timestamps"], items=x[col])[1], axis=1)

TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["power_timestamps"], items=x[col])[0], axis=1)


In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=power_columns + ["power_timestamps"], errors="ignore")

In [None]:
for col in TRIALS_AND_SPECTRAL_DF:
    print(col)

# Filtering out phase

In [None]:
lfp_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "trace" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
band_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "band" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
phase_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "phase" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
phase_columns = phase_columns + band_columns +lfp_columns

In [None]:
phase_columns

In [None]:
for col in sorted(phase_columns):
    updated_item_col = "trial_and_post_{}".format(col)
    print(updated_item_col)
    updated_timestamp_col = "trial_and_post_lfp_timestamps"
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["lfp_timestamps"], items=x[col])[1], axis=1)

TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["post_trial_end_timestamp"], timestamps=x["lfp_timestamps"], items=x[col])[0], axis=1)


In [None]:
TRIALS_AND_SPECTRAL_DF["trial_and_post_lfp_timestamps"]

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=phase_columns + ["lfp_timestamps"], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

In [None]:
for col in TRIALS_AND_SPECTRAL_DF:
    print(col)

In [None]:
TRIALS_AND_SPECTRAL_DF["trial_and_post_kmeans_cluster"].iloc[0].shape

In [None]:
raise ValueError()

# Filtering for rows that are in the video

In [None]:
TRIALS_AND_SPECTRAL_DF["in_video"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: x["start_frame"] <= x["trial_video_frame"][0] <= x["stop_frame"], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF[TRIALS_AND_SPECTRAL_DF["in_video"]].reset_index()

In [None]:
TRIALS_AND_SPECTRAL_DF

In [None]:
TRIALS_AND_SPECTRAL_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

In [None]:
raise ValueError()

# Filtering out spikes

In [None]:
raise ValueError()

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_spike_times"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: filter_spike_times(x["spike_times"], start=x["tone_start_timestamp"], stop=x["baseline_stop_timestamp"]).astype(int), axis=1)


In [None]:
TRIALS_AND_SPECTRAL_DF["trial_spike_times"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: filter_spike_times(x["spike_times"], start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"]).astype(int), axis=1)


In [None]:
TRIALS_AND_SPECTRAL_DF["tone_start_timestamp"].head()

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_stop_timestamp"].head()

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_spike_times"].iloc[0]

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_neuron_average_fr"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["neuron_average_timestamps"], items=x["neuron_average_fr"].T)[1], axis=1)
TRIALS_AND_SPECTRAL_DF["baseline_neuron_average_timestamp"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["neuron_average_timestamps"], items=x["neuron_average_fr"].T)[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF["trial_neuron_average_fr"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["neuron_average_timestamps"], items=x["neuron_average_fr"].T)[1].T, axis=1)
TRIALS_AND_SPECTRAL_DF["trial_neuron_average_timestamp"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["neuron_average_timestamps"], items=x["neuron_average_fr"].T)[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=["spike_clusters", "spike_times", "neuron_average_fr", "neuron_average_timestamps",], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF["trial_neuron_average_fr"].iloc[0].shape

In [None]:
TRIALS_AND_SPECTRAL_DF["trial_neuron_average_timestamp"].iloc[0].shape