# All oscillation analysis

Brief 1-2 sentence description of notebook.

In [1]:
# Imports of all used packages and libraries
import sys
import os
import git
import glob
from collections import defaultdict

In [2]:
git_repo = git.Repo(".", search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")

In [3]:
git_root

'/blue/npadillacoreano/ryoi360/projects/reward_comp/repos/reward_competition_extention'

In [4]:
sys.path.insert(0, os.path.join(git_root, 'src'))

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
import os
import collections
import itertools
from collections import defaultdict
from itertools import combinations

In [7]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
from scipy import stats
from scipy.stats import mannwhitneyu


In [8]:
import matplotlib
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
import colorsys

In [9]:
from sklearn.metrics import confusion_matrix

In [10]:
from spectral_connectivity import Multitaper, Connectivity
import spectral_connectivity

In [11]:
import utilities.helper
import trodes.read_exported

In [12]:
FONTSIZE = 20

In [13]:
font = {'weight' : 'medium',
        'size'   : 20}

matplotlib.rc('font', **font)

## Inputs & Data

Explanation of each input and where it comes from.

In [14]:
 COMP_CLOSENESS_DICT = {'Subj 1 blocking Subj 2': "competitive",
'Subj 2 Only': "no_comp",
'Subj 2 blocking Subj 1': "competitive",
'Subj 1 then Subj 2': "competitive", 
'Subj 1 Only': "no_comp",
'Subj 2 then Subj 1': "competitive",
'Close Call': "competitive",
'After trial': "no_comp"}

In [15]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc/" # where data is saved should always be shown in the inputs
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_PREFIX = "rce_pilot_2"

In [16]:
TRIAL_LABELS_DF = pd.read_excel("./proc/rce2_finalized_trial_labels.xlsx")

In [17]:
LFP_SPECTRAL_DF = pd.read_pickle("./proc/rce_pilot_2_04_spectral_and_sleap.pkl")

In [18]:
LFP_SPECTRAL_DF["video_name"] = LFP_SPECTRAL_DF["video_name"].apply(lambda x: x.strip(".videoTimeStamps.cameraHWSync"))
LFP_SPECTRAL_DF["current_subject"] = LFP_SPECTRAL_DF["current_subject"].astype(str)

In [19]:
TRIAL_LABELS_DF["current_subject"] = TRIAL_LABELS_DF["current_subject"].astype(str)
TRIAL_LABELS_DF["competition_closeness"] = TRIAL_LABELS_DF["competition_closeness"].map(COMP_CLOSENESS_DICT)

In [20]:
LFP_SPECTRAL_DF["video_name"].unique()

array(['20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.2',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.1',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.2',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.1',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.3',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.2',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.1',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.2',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.1',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.3',
       '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1.1',
       '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1.2',
       '20230619_115321_standard_comp_

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

## Functions 

In [21]:
import numpy as np

def mask_slices(array_2d, slice_index):
    """
    Masks elements outside a specified slice in a 2D array with NaN.

    Parameters:
    - array_2d (np.ndarray): The input 2D array to mask.
    - slice_index (tuple): A tuple of two integers that specifies the start and end indices 
                           of the slice to retain. The elements outside this slice are set to NaN.

    Returns:
    - np.ndarray: A 2D array with elements outside the specified slice set to NaN.

    Raises:
    - ValueError: If `slice_index` is not a tuple or does not contain exactly two elements.
    - IndexError: If the slice indices are out of the array bounds.
    """
    if not isinstance(slice_index, tuple) or len(slice_index) != 2:
        raise ValueError("slice_index must be a tuple of two integers (start, end).")

    try:
        result = array_2d.copy()
        mask_2d = np.ones(result.shape, dtype=bool)
        mask_2d[slice_index[0]:slice_index[1]] = False

        # Set values outside the defined slice to NaN
        result[mask_2d] = np.nan
        return result

    except IndexError as e:
        warnings.warn(f"Slice index out of bounds: {e}")
        return np.nan
    except Exception as e:
        warnings.warn(f"An error occurred while masking the array: {e}")
        return np.nan


In [22]:
def overlay_arrays(array1, array2):
    """
    Overlays two 2D NumPy arrays of the same shape, preferring non-NaN values from the first array.
    If both arrays have a non-NaN value at the same position, the value from the first array is used.

    Parameters:
    - array1 (np.ndarray): The primary 2D array.
    - array2 (np.ndarray): The secondary 2D array, used only where array1 has NaNs.

    Returns:
    - np.ndarray: A 2D array composed of the overlaid results of array1 and array2.

    Raises:
    - ValueError: If `array1` and `array2` do not have the same shape.
    """
    if array1.shape != array2.shape:
        raise ValueError("Both arrays must have the same shape.")

    # Create a copy of the first array to ensure that no changes are made to the original
    result = np.copy(array1)

    # Find indices where array1 is NaN and array2 is not NaN
    mask = np.isnan(array1) & ~np.isnan(array2)

    # Place non-NaN values from array2 where array1 has NaNs
    result[mask] = array2[mask]

    return result

In [23]:
def extract_start_stop_elements(array, start_index=0, stop_index=-1):
    """
    Extracts elements from an array at specified start and stop indices.
    
    Parameters:
    - array (list or array-like): The array from which elements are to be extracted.
    - start_index (int): The index of the start element. Default is 0.
    - stop_index (int): The index of the stop element. Default is -1, which corresponds to the last element.
    
    Returns:
    - tuple: A tuple containing the elements at the start and stop indices.
    
    If the stop_index is out of the array's range, it defaults to the last element of the array.
    """
    if stop_index >= len(array) or stop_index < 0:
        stop_index = -1  # Ensure the stop_index points to the last element if it's out of range
    return array[start_index], array[stop_index]

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [24]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names




In [25]:
LFP_SPECTRAL_DF.columns

Index(['cohort', 'session_dir', 'tone_frames', 'box_1_port_entry_frames',
       'box_2_port_entry_frames', 'video_name', 'session_path', 'recording',
       'current_subject', 'subject',
       ...
       'reward_port', 'box_bottom_left', 'box_bottom_right', 'agent',
       'subject_locations', 'agent_locations', 'subject_thorax_velocity',
       'agent_thorax_velocity', 'subject_thorax_to_reward_port',
       'agent_thorax_to_reward_port'],
      dtype='object', length=168)

In [26]:
TRIAL_LABELS_DF.columns

Index(['tracked_subject', 'box_number', 'sleap_name', 'video_name',
       'current_subject', 'tone_start_frame', 'reward_start_frame',
       'tone_stop_frame', 'condition', 'competition_closeness', 'notes',
       'experiment', 'session_dir', 'all_subjects', 'tone_start_timestamp',
       'tone_stop_timestamp', 'trial_label'],
      dtype='object')

# Editing the dataframes

In [27]:
LFP_SPECTRAL_DF.head()

Unnamed: 0,cohort,session_dir,tone_frames,box_1_port_entry_frames,box_2_port_entry_frames,video_name,session_path,recording,current_subject,subject,...,reward_port,box_bottom_left,box_bottom_right,agent,subject_locations,agent_locations,subject_thorax_velocity,agent_thorax_velocity,subject_thorax_to_reward_port,agent_thorax_to_reward_port
0,2,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.1,1.1,...,"[45.23441700698972, 35.83828707637651]","[30.458913657178506, 34.21867013031604]","[59.89132415244771, 34.13310301149612]",1.2,"[[[46.68890098287189, 11.495085446056166], [43...","[[[34.36404036584844, 35.615553316175784], [34...","[1.8125, 1.707, 1.6045, 1.506, 1.409, 1.315, 1...","[14.336, 12.875, 11.47, 10.11, 8.81, 7.57, 6.3...","[26.45, 26.16, 26.1, 25.77, 25.72, 25.78, 26.1...","[6.11, 6.06, 6.125, 6.566, 6.605, 6.676, 7.13,..."
1,2,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 930], [930, 948...","[[32974, 32976], [33201, 33207], [33208, 33211...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.1,1.1,...,"[48.01956210909252, 37.264554576109816]","[33.1534452699471, 35.61101869926864]","[62.60288905131385, 35.71974322144712]",,"[[[27.67316905433137, 31.06298795228111], [27....",,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",,"[21.36, 21.36, 21.36, 21.36, 21.36, 21.36, 21....",
2,2,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.2,1.2,...,"[45.23441700698972, 35.83828707637651]","[30.458913657178506, 34.21867013031604]","[59.89132415244771, 34.13310301149612]",,"[[[34.36404036584844, 35.615553316175784], [34...",,"[55.4, 52.03, 48.72, 45.53, 42.44, 39.44, 36.5...",,"[6.11, 6.06, 6.125, 6.566, 6.605, 6.676, 7.13,...",
3,2,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.2,1.2,...,"[45.23441700698972, 35.83828707637651]","[30.458913657178506, 34.21867013031604]","[59.89132415244771, 34.13310301149612]",1.1,"[[[34.36404036584844, 35.615553316175784], [34...","[[[46.68890098287189, 11.495085446056166], [43...","[14.336, 12.875, 11.47, 10.11, 8.81, 7.57, 6.3...","[1.8125, 1.707, 1.6045, 1.506, 1.409, 1.315, 1...","[6.11, 6.06, 6.125, 6.566, 6.605, 6.676, 7.13,...","[26.45, 26.16, 26.1, 25.77, 25.72, 25.78, 26.1..."
4,2,20230613_105657_standard_comp_to_training_D2_s...,"[[916, 1117], [3312, 3513], [5608, 5808], [740...","[[49, 67], [70, 79], [360, 366], [460, 469], [...","[[33601, 33798], [34108, 34165], [34166, 34179...",20230613_105657_standard_comp_to_training_D2_s...,/scratch/back_up/reward_competition_extention/...,20230613_105657_standard_comp_to_training_D2_s...,1.1,1.1,...,"[45.31189572048577, 36.38317068198775]","[30.490085836306232, 34.91627159792731]","[60.10744182471764, 34.262899786598894]",,"[[[46.71964110607613, 36.493379374373234], [44...",,"[5.8, 5.207, 4.64, 4.1, 3.596, 3.127, 2.7, 2.3...",,"[4.633, 4.09, 4.36, 4.656, 4.65, 4.383, 4.35, ...",


In [28]:
LFP_SPECTRAL_DF["video_name"].unique()

array(['20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.1',
       '20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.2',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.1',
       '20230613_105657_standard_comp_to_training_D2_subj_1-1_and_1-4.2',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.1',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.3',
       '20230614_114041_standard_comp_to_training_D3_subj_1-1_and_1-2.2',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.1',
       '20230616_111904_standard_comp_to_training_D4_subj_1-4_and_1-2.2',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.1',
       '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.3',
       '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1.1',
       '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1.2',
       '20230619_115321_standard_comp_

In [29]:
LFP_SPECTRAL_DF["tracked_subject"]

0     [1.1, 1.2]
1          [1.1]
2          [1.2]
3     [1.1, 1.2]
4          [1.1]
5     [1.1, 1.4]
6     [1.1, 1.4]
7          [1.4]
8          [1.1]
9     [1.1, 1.2]
10         [1.1]
11    [1.1, 1.2]
12         [1.2]
13    [1.2, 1.4]
14         [1.2]
15         [1.4]
16    [1.2, 1.4]
17         [1.1]
18    [1.1, 1.2]
19    [1.1, 1.2]
20         [1.2]
21    [1.1, 1.4]
22         [1.1]
23         [1.4]
24    [1.1, 1.4]
25         [1.4]
26    [1.1, 1.2]
27         [1.1]
28         [1.2]
29    [1.1, 1.2]
30         [1.4]
31    [1.2, 1.4]
32    [1.1, 1.2]
33    [1.1, 1.2]
34    [1.2, 1.4]
35    [1.1, 1.4]
36    [1.1, 1.4]
37    [1.1, 1.2]
38    [1.1, 2.2]
39    [1.1, 2.2]
40    [1.1, 1.2]
41    [1.1, 1.4]
42    [1.1, 2.1]
43    [1.1, 1.4]
44    [1.4, 2.2]
45    [1.2, 1.4]
46    [1.2, 2.2]
47    [1.2, 1.4]
48    [1.4, 2.1]
Name: tracked_subject, dtype: object

In [30]:
LFP_SPECTRAL_DF["tracked_subject"] = LFP_SPECTRAL_DF["tracked_subject"].apply(lambda x: "_".join([str(subj) for subj in x]))

In [31]:
LFP_SPECTRAL_DF["current_tracked_subject"] = LFP_SPECTRAL_DF["tracked_subject"]

In [32]:
# Example with multiple columns being aggregated
GROUPED_TRIAL_LABELS_DF = TRIAL_LABELS_DF.groupby(['video_name', "current_subject"]).agg({col: list for col in [_ for _ in TRIAL_LABELS_DF.columns if _ not in ["video_name", 'current_subject']]  # Aggregating into a list
    # Other columns can be aggregated differently here
}).reset_index()

In [33]:
GROUPED_TRIAL_LABELS_DF["session_dir"] = GROUPED_TRIAL_LABELS_DF["session_dir"].apply(lambda x: x[0])


In [34]:
GROUPED_TRIAL_LABELS_DF["sleap_name"] = GROUPED_TRIAL_LABELS_DF["sleap_name"].apply(lambda x: x[0])

In [35]:
GROUPED_TRIAL_LABELS_DF["trial_notes"] = GROUPED_TRIAL_LABELS_DF["notes"]

In [36]:
GROUPED_TRIAL_LABELS_DF.head()

Unnamed: 0,video_name,current_subject,tracked_subject,box_number,sleap_name,tone_start_frame,reward_start_frame,tone_stop_frame,condition,competition_closeness,notes,experiment,session_dir,all_subjects,tone_start_timestamp,tone_stop_timestamp,trial_label,trial_notes
0,20230612_112630_standard_comp_to_training_D1_s...,1.1,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230612_112630_standard_comp_to_training_D1_s...,"[1125, 3519, 5815, 7612, 8709, 9708, 11604, 13...","[1225, 3619, 5915, 7712, 8809, 9808, 11704, 13...","[1324, 3720, 6014, 7811, 8910, 9907, 11804, 13...","[1.1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, ...","[competitive, no_comp, competitive, no_comp, n...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[standard, standard, standard, standard, stand...",20230612_112630_standard_comp_to_training_D1_s...,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[1126742, 3526740, 5826740, 7626736, 8726734, ...","[1326741, 3726740, 6026737, 7826735, 8926734, ...","[win, lose, lose, lose, lose, lose, lose, lose...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
1,20230612_112630_standard_comp_to_training_D1_s...,1.2,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230612_112630_standard_comp_to_training_D1_s...,"[1125, 3519, 5815, 7612, 8709, 9708, 11604, 13...","[1225, 3619, 5915, 7712, 8809, 9808, 11704, 13...","[1324, 3720, 6014, 7811, 8910, 9907, 11804, 13...","[1.1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, ...","[competitive, no_comp, competitive, no_comp, n...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[standard, standard, standard, standard, stand...",20230612_112630_standard_comp_to_training_D1_s...,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[1126742, 3526740, 5826740, 7626736, 8726734, ...","[1326741, 3726740, 6026737, 7826735, 8926734, ...","[lose, win, win, win, win, win, win, win, win,...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
2,20230612_112630_standard_comp_to_training_D1_s...,1.1,"[['1.1'], ['1.1'], ['1.1'], ['1.1'], ['1.1'], ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",20230612_112630_standard_comp_to_training_D1_s...,"[37056, 38254, 39352, 40650, 41747, 42746, 437...","[37156, 38354, 39452, 40750, 41847, 42846, 438...","[37256, 38453, 39552, 40849, 41947, 42945, 439...","[rewarded, rewarded, rewarded, rewarded, rewar...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[standard, standard, standard, standard, stand...",20230612_112630_standard_comp_to_training_D1_s...,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[37126704, 38326703, 39426702, 40726701, 41826...","[37326704, 38526703, 39626701, 40926700, 42026...","[rewarded, rewarded, rewarded, rewarded, rewar...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
3,20230613_105657_standard_comp_to_training_D2_s...,1.1,"[['1.1', '1.4'], ['1.1', '1.4'], ['1.1', '1.4'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230613_105657_standard_comp_to_training_D2_s...,"[916, 3312, 5608, 7405, 8502, 9501, 11397, 128...","[1016, 3412, 5708, 7505, 8602, 9601, 11497, 12...","[1117, 3513, 5808, 7604, 8703, 9700, 11596, 13...","[1.4, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, ...","[no_comp, no_comp, competitive, competitive, c...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[standard, standard, standard, standard, stand...",20230613_105657_standard_comp_to_training_D2_s...,"[['1.1', '1.4'], ['1.1', '1.4'], ['1.1', '1.4'...","[918755, 3318755, 5618754, 7418755, 8518752, 9...","[1118758, 3518757, 5818753, 7618752, 8718757, ...","[lose, win, win, win, win, win, win, win, win,...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
4,20230613_105657_standard_comp_to_training_D2_s...,1.4,"[['1.1', '1.4'], ['1.1', '1.4'], ['1.1', '1.4'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230613_105657_standard_comp_to_training_D2_s...,"[916, 3312, 5608, 7405, 8502, 9501, 11397, 128...","[1016, 3412, 5708, 7505, 8602, 9601, 11497, 12...","[1117, 3513, 5808, 7604, 8703, 9700, 11596, 13...","[1.4, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, ...","[no_comp, no_comp, competitive, competitive, c...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[standard, standard, standard, standard, stand...",20230613_105657_standard_comp_to_training_D2_s...,"[['1.1', '1.4'], ['1.1', '1.4'], ['1.1', '1.4'...","[918755, 3318755, 5618754, 7418755, 8518752, 9...","[1118758, 3518757, 5818753, 7618752, 8718757, ...","[win, lose, lose, lose, lose, lose, lose, lose...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."


## Merging the labels and LFP spectral

In [37]:
TRIALS_AND_SPECTRAL_DF = pd.merge(GROUPED_TRIAL_LABELS_DF, LFP_SPECTRAL_DF, on=["current_subject", "video_name"], how="inner", suffixes=('', '_y'))

In [38]:
for col in TRIALS_AND_SPECTRAL_DF:
    print(col)

video_name
current_subject
tracked_subject
box_number
sleap_name
tone_start_frame
reward_start_frame
tone_stop_frame
condition
competition_closeness
notes
experiment
session_dir
all_subjects
tone_start_timestamp
tone_stop_timestamp
trial_label
trial_notes
cohort
session_dir_y
tone_frames
box_1_port_entry_frames
box_2_port_entry_frames
session_path
recording
subject
all_subjects_y
first_timestamp
last_timestamp
video_timestamps
tone_timestamps
box_1_port_entry_timestamps
box_2_port_entry_timestamps
lfp_timestamps
mPFC_theta_band
MD_theta_band
LH_theta_band
BLA_theta_band
vHPC_theta_band
mPFC_gamma_band
MD_gamma_band
LH_gamma_band
BLA_gamma_band
vHPC_gamma_band
mPFC_theta_phase
MD_theta_phase
LH_theta_phase
BLA_theta_phase
vHPC_theta_phase
mPFC_gamma_phase
MD_gamma_phase
LH_gamma_phase
BLA_gamma_phase
vHPC_gamma_phase
mPFC_RMS_filtered_power_all_frequencies_all_windows
MD_RMS_filtered_power_all_frequencies_all_windows
LH_RMS_filtered_power_all_frequencies_all_windows
BLA_RMS_filtered_pow

In [39]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=[col for col in TRIALS_AND_SPECTRAL_DF.columns if "_y" in col], errors="ignore")

In [40]:
TRIALS_AND_SPECTRAL_DF["start_stop_frame"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: (x["start_frame"]-1, x["stop_frame"]-1), axis=1)


In [41]:
TRIALS_AND_SPECTRAL_DF["start_stop_timestamps"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: extract_start_stop_elements(x["video_timestamps"], x["start_stop_frame"][0], x["start_stop_frame"][1]), axis=1)


- Making all parts of the numpy array that is not part of slice Nans

In [42]:
sleap_columns = ["subject_locations", 
                 "agent_locations", 
                 "subject_thorax_velocity", 
                 "agent_thorax_velocity", 
                 "subject_thorax_to_reward_port", 
                 "agent_thorax_to_reward_port"]

In [43]:
for col in sleap_columns:
    TRIALS_AND_SPECTRAL_DF[col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: mask_slices(x[col], x["start_stop_frame"]), axis=1)


- Filtering for frames

In [44]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=["tone_frames", "start_frame", "stop_frame"], errors="ignore")

In [45]:
import numpy as np

def filter_array_by_values(arr, start_value=0, stop_value=1000000):
    """
    Filters elements of a 1D or rows of a 2D numpy array based on specified value range.
    
    Parameters:
    - arr (numpy array): The input numpy array to filter.
    - start_value (numeric): The lower bound for the filtering. Default is 0.
    - stop_value (numeric): The upper bound for the filtering. Default is 1000000.
    
    Returns:
    - numpy array: A numpy array containing only the filtered elements or rows.
    
    Raises:
    - ValueError: If the input array has more than two dimensions.
    """
    result = np.array(arr)
    
    if result.ndim == 1:
        # Apply filter for a 1D array
        mask = (result > start_value) & (result < stop_value)
        return result[mask], mask
    
    elif result.ndim == 2:
        # Apply filter based on the first column for a 2D array
        mask = (result[:, 0] > start_value) & (result[:, 0] < stop_value)
        return result[mask], mask
    
    else:
        raise ValueError("The input array must be either 1D or 2D.")


In [46]:
TRIALS_AND_SPECTRAL_DF["tone_mask"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: filter_array_by_values(x["tone_start_frame"], 
                                                                                                   x["start_stop_frame"][0], 
                                                                                                   x["start_stop_frame"][1])[1], axis=1)
                                                                                                   

In [47]:
for column in [col for col in TRIALS_AND_SPECTRAL_DF if "frame" in col and col != "start_stop_frame"]:
    TRIALS_AND_SPECTRAL_DF[column] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: filter_array_by_values(x[column], 
                                                                                                   x["start_stop_frame"][0], 
                                                                                                   x["start_stop_frame"][1])[0], axis=1)
                                                                                                   

In [50]:
TRIALS_AND_SPECTRAL_DF["condition"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: np.array(x["condition"])[x["tone_mask"]], axis=1)

In [51]:
TRIALS_AND_SPECTRAL_DF["condition"]

0     [1.1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, ...
1     [rewarded, rewarded, rewarded, rewarded, rewar...
2     [1.1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, ...
3     [rewarded, rewarded, rewarded, rewarded, rewar...
4     [rewarded, rewarded, rewarded, rewarded, rewar...
5     [1.4, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, ...
6     [1.4, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, ...
7     [rewarded, rewarded, rewarded, rewarded, rewar...
8     [rewarded, rewarded, rewarded, rewarded, rewar...
9     [1.2, 1.2, 1.1, 1.1, 1.2, 1.1, 1.1, 1.1, 1.1, ...
10    [1.2, 1.2, 1.1, 1.1, 1.2, 1.1, 1.1, 1.1, 1.1, ...
11    [rewarded, rewarded, rewarded, rewarded, rewar...
12    [rewarded, rewarded, rewarded, rewarded, rewar...
13    [1.2, 1.2, 1.4, 1.2, 1.4, 1.2, 1.2, 1.4, 1.4, ...
14    [rewarded, rewarded, rewarded, rewarded, rewar...
15    [1.2, 1.2, 1.4, 1.2, 1.4, 1.2, 1.2, 1.4, 1.4, ...
16    [rewarded, rewarded, rewarded, rewarded, rewar...
17                                              

In [55]:
TRIALS_AND_SPECTRAL_DF["tone_mask"].iloc[17]

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False])

In [None]:
raise ValueError()

In [49]:
TRIALS_AND_SPECTRAL_DF

Unnamed: 0,video_name,current_subject,tracked_subject,box_number,sleap_name,tone_start_frame,reward_start_frame,tone_stop_frame,condition,competition_closeness,...,subject_locations,agent_locations,subject_thorax_velocity,agent_thorax_velocity,subject_thorax_to_reward_port,agent_thorax_to_reward_port,current_tracked_subject,start_stop_frame,start_stop_timestamps,tone_mask
0,20230612_112630_standard_comp_to_training_D1_s...,1.1,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230612_112630_standard_comp_to_training_D1_s...,"[1125, 3519, 5815, 7612, 8709, 9708, 11604, 13...","[1225, 3619, 5915, 7712, 8809, 9808, 11704, 13...","[1324, 3720, 6014, 7811, 8910, 9907, 11804, 13...","[1.1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, ...","[competitive, no_comp, competitive, no_comp, n...",...,"[[[46.68890098287189, 11.495085446056166], [43...","[[[34.36404036584844, 35.615553316175784], [34...","[1.8125, 1.707, 1.6045, 1.506, 1.409, 1.315, 1...","[14.336, 12.875, 11.47, 10.11, 8.81, 7.57, 6.3...","[26.45, 26.16, 26.1, 25.77, 25.72, 25.78, 26.1...","[6.11, 6.06, 6.125, 6.566, 6.605, 6.676, 7.13,...",1.1_1.2,"(0, 32299)","(1384, 32361438)","[True, True, True, True, True, True, True, Tru..."
1,20230612_112630_standard_comp_to_training_D1_s...,1.2,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230612_112630_standard_comp_to_training_D1_s...,"[37101, 38299, 39397, 40695, 41792, 42790, 437...","[37201, 38399, 39497, 40795, 41892, 42890, 438...","[37300, 38498, 39597, 40894, 41992, 42990, 439...","[1.1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, ...","[competitive, no_comp, competitive, no_comp, n...",...,"[[[nan, nan], [nan, nan], [nan, nan], [nan, na...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,1.2,"(32699, 68256)","(32717606, 68341384)","[False, False, False, False, False, False, Fal..."
2,20230612_112630_standard_comp_to_training_D1_s...,1.2,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230612_112630_standard_comp_to_training_D1_s...,"[1125, 3519, 5815, 7612, 8709, 9708, 11604, 13...","[1225, 3619, 5915, 7712, 8809, 9808, 11704, 13...","[1324, 3720, 6014, 7811, 8910, 9907, 11804, 13...","[1.1, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, ...","[competitive, no_comp, competitive, no_comp, n...",...,"[[[34.36404036584844, 35.615553316175784], [34...","[[[46.68890098287189, 11.495085446056166], [43...","[14.336, 12.875, 11.47, 10.11, 8.81, 7.57, 6.3...","[1.8125, 1.707, 1.6045, 1.506, 1.409, 1.315, 1...","[6.11, 6.06, 6.125, 6.566, 6.605, 6.676, 7.13,...","[26.45, 26.16, 26.1, 25.77, 25.72, 25.78, 26.1...",1.1_1.2,"(0, 32299)","(1384, 32361438)","[True, True, True, True, True, True, True, Tru..."
3,20230612_112630_standard_comp_to_training_D1_s...,1.1,"[['1.1'], ['1.1'], ['1.1'], ['1.1'], ['1.1'], ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",20230612_112630_standard_comp_to_training_D1_s...,"[37056, 38254, 39352, 40650, 41747, 42746, 437...","[37156, 38354, 39452, 40750, 41847, 42846, 438...","[37256, 38453, 39552, 40849, 41947, 42945, 439...","[rewarded, rewarded, rewarded, rewarded, rewar...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",...,"[[[nan, nan], [nan, nan], [nan, nan], [nan, na...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,1.1,"(32999, 68211)","(33062688, 68339998)","[True, True, True, True, True, True, True, Tru..."
4,20230613_105657_standard_comp_to_training_D2_s...,1.1,"[['1.1', '1.4'], ['1.1', '1.4'], ['1.1', '1.4'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230613_105657_standard_comp_to_training_D2_s...,"[36889, 38087, 39185, 40482, 41579, 42578, 435...","[36989, 38187, 39285, 40582, 41679, 42678, 436...","[37088, 38286, 39384, 40681, 41780, 42777, 437...","[1.4, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, ...","[no_comp, no_comp, competitive, competitive, c...",...,"[[[nan, nan], [nan, nan], [nan, nan], [nan, na...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,1.1,"(33399, 68331)","(33423014, 68420378)","[False, False, False, False, False, False, Fal..."
5,20230613_105657_standard_comp_to_training_D2_s...,1.1,"[['1.1', '1.4'], ['1.1', '1.4'], ['1.1', '1.4'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230613_105657_standard_comp_to_training_D2_s...,"[916, 3312, 5608, 7405, 8502, 9501, 11397, 128...","[1016, 3412, 5708, 7505, 8602, 9601, 11497, 12...","[1117, 3513, 5808, 7604, 8703, 9700, 11596, 13...","[1.4, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, ...","[no_comp, no_comp, competitive, competitive, c...",...,"[[[46.71964110607613, 36.493379374373234], [44...","[[[50.63276479731981, 10.511614189582007], [51...","[4.04, 3.848, 3.658, 3.475, 3.291, 3.113, 2.94...","[4.402, 3.873, 3.361, 2.87, 2.398, 1.946, 1.51...","[4.633, 4.09, 4.36, 4.656, 4.65, 4.383, 4.35, ...","[28.12, 28.16, 28.22, 28.06, 27.9, 27.9, 27.9,...",1.1_1.4,"(0, 32999)","(1384, 33061302)","[True, True, True, True, True, True, True, Tru..."
6,20230613_105657_standard_comp_to_training_D2_s...,1.4,"[['1.1', '1.4'], ['1.1', '1.4'], ['1.1', '1.4'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230613_105657_standard_comp_to_training_D2_s...,"[916, 3312, 5608, 7405, 8502, 9501, 11397, 128...","[1016, 3412, 5708, 7505, 8602, 9601, 11497, 12...","[1117, 3513, 5808, 7604, 8703, 9700, 11596, 13...","[1.4, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, ...","[no_comp, no_comp, competitive, competitive, c...",...,"[[[50.63276479731981, 10.511614189582007], [51...","[[[46.71964110607613, 36.493379374373234], [44...","[4.402, 3.873, 3.361, 2.87, 2.398, 1.946, 1.51...","[4.04, 3.848, 3.658, 3.475, 3.291, 3.113, 2.94...","[28.12, 28.16, 28.22, 28.06, 27.9, 27.9, 27.9,...","[4.633, 4.09, 4.36, 4.656, 4.65, 4.383, 4.35, ...",1.1_1.4,"(0, 32999)","(1384, 33061302)","[True, True, True, True, True, True, True, Tru..."
7,20230613_105657_standard_comp_to_training_D2_s...,1.4,"[['1.4'], ['1.4'], ['1.4'], ['1.4'], ['1.4'], ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",20230613_105657_standard_comp_to_training_D2_s...,"[36849, 38047, 39145, 40443, 41540, 42538, 435...","[36949, 38147, 39245, 40543, 41640, 42638, 436...","[37048, 38246, 39345, 40642, 41740, 42738, 437...","[rewarded, rewarded, rewarded, rewarded, rewar...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",...,"[[[nan, nan], [nan, nan], [nan, nan], [nan, na...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,1.4,"(33699, 68291)","(33762552, 68420378)","[True, True, True, True, True, True, True, Tru..."
8,20230614_114041_standard_comp_to_training_D3_s...,1.1,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230614_114041_standard_comp_to_training_D3_s...,"[36875, 38073, 39171, 40468, 41567, 42564, 43563]","[36975, 38173, 39271, 40568, 41667, 42664, 43663]","[37075, 38273, 39370, 40667, 41766, 42764, 43762]","[1.2, 1.2, 1.1, 1.1, 1.2, 1.1, 1.1, 1.1, 1.1, ...","[competitive, no_comp, no_comp, competitive, c...",...,"[[[nan, nan], [nan, nan], [nan, nan], [nan, na...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...",,1.1,"(32799, 44204)","(32773048, 44199536)","[False, False, False, False, False, False, Fal..."
9,20230614_114041_standard_comp_to_training_D3_s...,1.1,"[['1.1', '1.2'], ['1.1', '1.2'], ['1.1', '1.2'...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",20230614_114041_standard_comp_to_training_D3_s...,"[855, 3251, 5546, 7343, 8441, 9439, 11335, 128...","[955, 3351, 5646, 7443, 8541, 9539, 11435, 129...","[1055, 3450, 5747, 7542, 8640, 9639, 11536, 13...","[1.2, 1.2, 1.1, 1.1, 1.2, 1.1, 1.1, 1.1, 1.1, ...","[competitive, no_comp, no_comp, competitive, c...",...,"[[[44.962146067447776, 33.875261125037554], [4...","[[[52.756810221797416, 36.78031943889759], [52...","[8.55, 7.92, 7.3, 6.69, 6.09, 5.496, 4.918, 4....","[31.56, 28.44, 25.4, 22.5, 19.67, 16.97, 14.37...","[5.957, 5.52, 5.055, 4.99, 4.484, 4.047, 4.195...","[10.77, 10.75, 10.76, 10.91, 10.95, 11.234, 11...",1.1_1.2,"(0, 32199)","(-2, 32260270)","[True, True, True, True, True, True, True, Tru..."


In [48]:
raise ValueError()

ValueError: 

In [None]:
TRIALS_AND_SPECTRAL_DF.head()

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=[col for col in TRIALS_AND_SPECTRAL_DF if "band" in col], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=[col for col in TRIALS_AND_SPECTRAL_DF if "phase" in col], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=[col for col in TRIALS_AND_SPECTRAL_DF if "windows" in col], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=[col for col in TRIALS_AND_SPECTRAL_DF if "theta" in col], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=[col for col in TRIALS_AND_SPECTRAL_DF if "gamma" in col], errors="ignore")

In [None]:
for col in TRIALS_AND_SPECTRAL_DF:
    print(col)

In [None]:
TRIALS_AND_SPECTRAL_DF["tracked_subject"] = TRIALS_AND_SPECTRAL_DF["tracked_subject"].apply(lambda x: x[0]) 

In [None]:
TRIALS_AND_SPECTRAL_DF["box_number"] = TRIALS_AND_SPECTRAL_DF["box_number"].apply(lambda x: x[0]) 

In [None]:
TRIALS_AND_SPECTRAL_DF["experiment"] = TRIALS_AND_SPECTRAL_DF["experiment"].apply(lambda x: x[0]) 

In [None]:
TRIALS_AND_SPECTRAL_DF.iloc[:, :10].head()

In [None]:
TRIALS_AND_SPECTRAL_DF.iloc[:, 10:20].head()

In [None]:
TRIALS_AND_SPECTRAL_DF.iloc[:, 20:30].head()

In [None]:
TRIALS_AND_SPECTRAL_DF.iloc[:, 30:40].head()

In [None]:
TRIALS_AND_SPECTRAL_DF.iloc[:, 40:50].head()

In [None]:
TRIALS_AND_SPECTRAL_DF.iloc[:, 50:60].head()

In [None]:
for col in sorted(sleap_columns):
    updated_item_col = "baseline_{}".format(col)
    print(updated_item_col)
    updated_timestamp_col = "baseline_video_timestamps".format(col)
    if "agent" in col:
        TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["video_timestamps"], items=x[col])[1] if x["agent"] else np.nan, axis=1)
    else:
        TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["video_timestamps"], items=x[col])[1], axis=1)

TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["video_timestamps"], items=x[col])[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF["video_timestamps"]

In [None]:
TRIALS_AND_SPECTRAL_DF.shape

In [None]:
for vid in TRIALS_AND_SPECTRAL_DF["video_name"]:
    print(vid)

In [None]:
raise ValueError()

- Merging rows

In [None]:
for vid in TRIALS_AND_SPECTRAL_DF["video_name"]:
    print(vid)

In [None]:
for col in TRIALS_AND_SPECTRAL_DF:
    print(col)

In [None]:
drop_columns = ["video_name", "tracked_subject", "box_number", "sleap_name", "start_frame", "stop_frame", "in_video_subjects", "current_tracked_subject", "start_stop_frame"]

In [None]:
TRIALS_AND_SPECTRAL_DF[drop_columns]

In [None]:
aggregation_dict = {col:"first" for col in TRIALS_AND_SPECTRAL_DF if col not in ['subject_locations', "current_subject", "session_dir"]}

In [None]:
aggregation_dict["subject_locations"] = list

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.groupby(["current_subject", "session_dir"]).agg(aggregation_dict).reset_index()

In [None]:
TRIALS_AND_SPECTRAL_DF[drop_columns].head()

In [None]:

body_parts
box_top_left
box_top_right
reward_port
box_bottom_left
box_bottom_right
agent

recording_name

In [None]:
agent_locations
subject_thorax_velocity
agent_thorax_velocity
subject_thorax_to_reward_port
agent_thorax_to_reward_port

In [None]:
for col in TRIALS_AND_SPECTRAL_DF:
    print(col)

In [None]:
raise ValueError()

In [None]:
TRIALS_AND_SPECTRAL_DF

In [None]:
def combine_grouped_rows(df, array_columns):
    """
    Combine rows within groups of a DataFrame. Uses the `overlay_arrays` for specified columns
    and takes the first instance for other columns.

    Parameters:
    - df (pd.DataFrame): DataFrame to process.
    - array_columns (list): List of column names in `df` that contain array values to be combined using `overlay_arrays`.

    Returns:
    - pd.DataFrame: DataFrame after combining rows within groups.
    """
    def custom_aggregator(x):
        if x.name in array_columns:
            # Reduce the column by overlaying arrays
            return x.dropna().aggregate(lambda arrays: arrays.reduce(overlay_arrays))
        else:
            # For other columns, simply return the first element
            return x.iloc[0]

    # Apply the custom aggregator to each column individually
    return df.groupby(df.index).aggregate(custom_aggregator)


In [None]:
combine_grouped_rows(TRIALS_AND_SPECTRAL_DF, ["subject_locations"])

In [None]:
TRIALS_AND_SPECTRAL_DF.head()

In [None]:
overlay_arrays(TRIALS_AND_SPECTRAL_DF["subject_locations"].iloc[0], TRIALS_AND_SPECTRAL_DF["subject_locations"].iloc[1])

In [None]:
TRIALS_AND_SPECTRAL_DF["subject_locations"].iloc[0].shape

In [None]:
example_arr.shape

In [None]:
mask_slices(example_arr, (41000, 79050))

In [None]:
# Create a 2D array
array_2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# Slice definition for rows and columns
row_start, row_end = 1, 3
col_start, col_end = 0, 2

# Create mask for rows and columns
mask_2d = np.ones(array_2d.shape, dtype=bool)
mask_2d[row_start:row_end, col_start:col_end] = False

# Set values outside the defined slice to NaN
array_2d[mask_2d] = np.nan

print(array_2d)

In [None]:
raise ValueError()

In [None]:
for index, row in TRIALS_AND_SPECTRAL_DF.iterrows():
    print(row["video_name"])

In [None]:
TRIALS_AND_SPECTRAL_DF.head()

In [None]:
raise ValueError()

In [None]:
TRIALS_AND_SPECTRAL_DF = pd.merge(TRIAL_LABELS_DF, LFP_SPECTRAL_DF, on=["current_subject", "video_name"], how="inner", suffixes=('', '_y'))

In [None]:
# TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=[col for col in TRIALS_AND_SPECTRAL_DF.columns if "trace" in col], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=[col for col in TRIALS_AND_SPECTRAL_DF.columns if "_y" in col], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF.head()

In [None]:
TRIALS_AND_SPECTRAL_DF.columns

In [None]:
TRIALS_AND_SPECTRAL_DF

# Calculating the relevant timestamps

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_start_timestamp"] = TRIALS_AND_SPECTRAL_DF["tone_start_timestamp"] - 30 * 20000

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_stop_timestamp"] = TRIALS_AND_SPECTRAL_DF["tone_start_timestamp"] - 20 * 20000

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_start_timestamp"].head()

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_stop_timestamp"].head()

In [None]:
TRIALS_AND_SPECTRAL_DF["tone_start_timestamp"].head()

In [None]:
TRIALS_AND_SPECTRAL_DF["tone_stop_timestamp"].head()

# Filtering out the SLEAP posed for during trials

In [None]:
TRIALS_AND_SPECTRAL_DF["video_frame"] = TRIALS_AND_SPECTRAL_DF["video_timestamps"].apply(lambda x: np.array(list(range(len(x)))) + 1)

In [None]:
TRIALS_AND_SPECTRAL_DF["video_frame"].head()

In [None]:
sleap_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "locations" in col or "velocity" in col or "to_reward_port" in col or "video_frame" in col]

In [None]:
sleap_columns

In [None]:
TRIALS_AND_SPECTRAL_DF["video_timestamps"].iloc[0].shape

In [None]:
TRIALS_AND_SPECTRAL_DF["video_frame"].iloc[0].shape

In [None]:
TRIALS_AND_SPECTRAL_DF["subject_thorax_to_reward_port"].iloc[0].shape

In [None]:
for col in sorted(sleap_columns):
    updated_item_col = "baseline_{}".format(col)
    print(updated_item_col)
    updated_timestamp_col = "baseline_video_timestamps".format(col)
    if "agent" in col:
        TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["video_timestamps"], items=x[col])[1] if x["agent"] else np.nan, axis=1)
    else:
        TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["video_timestamps"], items=x[col])[1], axis=1)

TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["video_timestamps"], items=x[col])[0], axis=1)

In [None]:
for col in sorted(sleap_columns):
    updated_item_col = "trial_{}".format(col)
    print(updated_item_col)
    updated_timestamp_col = "trial_video_timestamps".format(col)
    if "agent" in col:
        TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["video_timestamps"], items=x[col])[1] if x["agent"] else np.nan, axis=1)
    else:
        TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["video_timestamps"], items=x[col])[1], axis=1)

TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["video_timestamps"], items=x[col])[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF[TRIALS_AND_SPECTRAL_DF["trial_subject_thorax_velocity"].apply(lambda x: np.isnan(x).any())]["baseline_subject_locations"]

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=sleap_columns + ["video_timestamps"], errors="ignore")

In [None]:
for col in TRIALS_AND_SPECTRAL_DF.columns:
    print(col)

- Filtering coherence

In [None]:
coherence_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "coherence" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
coherence_columns

In [None]:
for col in coherence_columns:
    brain_region = col.replace("all_windows", "_").strip("_")
    print(brain_region)
    
    updated_item_col = "baseline_{}".format(brain_region)
    updated_timestamp_col = "baseline_coherence_timestamps".format(brain_region)
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["coherence_timestamps"], items=x[col])[1], axis=1)
TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["coherence_timestamps"], items=x[col])[0], axis=1)

In [None]:
for col in coherence_columns:
    brain_region = col.replace("all_windows", "_").strip("_")
    print(brain_region)
    
    updated_item_col = "trial_{}".format(brain_region)
    updated_timestamp_col = "trial_coherence_timestamps".format(brain_region)
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["coherence_timestamps"], items=x[col])[1], axis=1)
TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["coherence_timestamps"], items=x[col])[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=coherence_columns + ["coherence_timestamps"], errors="ignore")

- Filtering Grangers

In [None]:
granger_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "granger" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
granger_columns

In [None]:
for col in granger_columns:
    brain_region = col.replace("all_windows", "_").strip("_")
    print(brain_region)
    
    updated_item_col = "baseline_{}".format(brain_region)
    updated_timestamp_col = "baseline_granger_timestamps".format(brain_region)
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["granger_timestamps"], items=x[col])[1], axis=1)
TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["granger_timestamps"], items=x[col])[0], axis=1)

In [None]:
for col in granger_columns:
    brain_region = col.replace("all_windows", "_").strip("_")
    print(brain_region)
    
    updated_item_col = "trial_{}".format(brain_region)
    updated_timestamp_col = "trial_granger_timestamps".format(brain_region)
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["granger_timestamps"], items=x[col])[1], axis=1)
TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["granger_timestamps"], items=x[col])[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=granger_columns + ["granger_timestamps"], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

- Filtering power

In [None]:
TRIALS_AND_SPECTRAL_DF.head()

In [None]:
power_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "power" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
power_columns

In [None]:
for col in power_columns:
    brain_region = col.replace("all_windows", "_").strip("_")
    print(brain_region)
    
    updated_item_col = "baseline_{}".format(brain_region)
    updated_timestamp_col = "baseline_power_timestamps".format(brain_region)
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["power_timestamps"], items=x[col])[1], axis=1)
TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["power_timestamps"], items=x[col])[0], axis=1)

In [None]:
for col in power_columns:
    brain_region = col.replace("all_windows", "_").strip("_")
    print(brain_region)
    
    updated_item_col = "trial_{}".format(brain_region)
    updated_timestamp_col = "trial_power_timestamps".format(brain_region)
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["power_timestamps"], items=x[col])[1], axis=1)
TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["power_timestamps"], items=x[col])[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_power_timestamps"]

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=power_columns + ["power_timestamps"], errors="ignore")

In [None]:
for col in TRIALS_AND_SPECTRAL_DF:
    print(col)

# Filtering out phase

In [None]:
lfp_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "trace" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
band_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "band" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
phase_columns = [col for col in TRIALS_AND_SPECTRAL_DF.columns if "phase" in col and "timestamps" not in col and "calculation" not in col]

In [None]:
phase_columns = phase_columns + band_columns +lfp_columns

In [None]:
phase_columns

In [None]:
for col in phase_columns:
    brain_region = col.replace("all_windows", "_").strip("_")
    print(brain_region)
    
    updated_item_col = "baseline_{}".format(brain_region)
    updated_timestamp_col = "baseline_lfp_timestamps".format(brain_region)
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["lfp_timestamps"], items=x[col])[1], axis=1)
TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["lfp_timestamps"], items=x[col])[0], axis=1)

In [None]:
for col in phase_columns:
    brain_region = col.replace("all_windows", "_").strip("_")
    print(brain_region)
    
    updated_item_col = "trial_{}".format(brain_region)
    updated_timestamp_col = "trial_lfp_timestamps".format(brain_region)
    TRIALS_AND_SPECTRAL_DF[updated_item_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["lfp_timestamps"], items=x[col])[1], axis=1)
TRIALS_AND_SPECTRAL_DF[updated_timestamp_col] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["lfp_timestamps"], items=x[col])[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_lfp_timestamps"]

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=phase_columns + ["lfp_timestamps"], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

In [None]:
for col in TRIALS_AND_SPECTRAL_DF:
    print(col)

# Filtering out spikes

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_spike_times"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: filter_spike_times(x["spike_times"], start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"]).astype(int), axis=1)


In [None]:
TRIALS_AND_SPECTRAL_DF["trial_spike_times"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: filter_spike_times(x["spike_times"], start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"]).astype(int), axis=1)


In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_start_timestamp"].head()

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_stop_timestamp"].head()

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_spike_times"].iloc[0]

In [None]:
TRIALS_AND_SPECTRAL_DF["baseline_neuron_average_fr"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["neuron_average_timestamps"], items=x["neuron_average_fr"].T)[1], axis=1)
TRIALS_AND_SPECTRAL_DF["baseline_neuron_average_timestamp"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["baseline_start_timestamp"], stop=x["baseline_stop_timestamp"], timestamps=x["neuron_average_timestamps"], items=x["neuron_average_fr"].T)[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF["trial_neuron_average_fr"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["neuron_average_timestamps"], items=x["neuron_average_fr"].T)[1].T, axis=1)
TRIALS_AND_SPECTRAL_DF["trial_neuron_average_timestamp"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: utilities.helper.filter_by_timestamp_range(start=x["tone_start_timestamp"], stop=x["tone_stop_timestamp"], timestamps=x["neuron_average_timestamps"], items=x["neuron_average_fr"].T)[0], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF.drop(columns=["spike_clusters", "spike_times", "neuron_average_fr", "neuron_average_timestamps",], errors="ignore")

In [None]:
TRIALS_AND_SPECTRAL_DF["trial_neuron_average_fr"].iloc[0].shape

In [None]:
TRIALS_AND_SPECTRAL_DF["trial_neuron_average_timestamp"].iloc[0].shape

# Filtering for rows that are in the video

In [None]:
TRIALS_AND_SPECTRAL_DF["in_video"] = TRIALS_AND_SPECTRAL_DF.apply(lambda x: x["start_frame"] <= x["trial_video_frame"][0] <= x["stop_frame"], axis=1)

In [None]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF[TRIALS_AND_SPECTRAL_DF["in_video"]].reset_index()

In [None]:
TRIALS_AND_SPECTRAL_DF

In [None]:
TRIALS_AND_SPECTRAL_DF.to_pickle(os.path.join(OUTPUT_DIR, FULL_LFP_TRACES_PKL))

In [None]:
raise ValueError()