# All oscillation analysis

Brief 1-2 sentence description of notebook.

In [1]:
# Imports of all used packages and libraries
import sys
import os
import glob
from collections import defaultdict

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
import collections
import itertools
from collections import defaultdict
from itertools import combinations

In [4]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
from scipy import stats
from scipy.stats import mannwhitneyu
# import seaborn as sns



In [5]:
import matplotlib
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import colorsys

In [6]:
FONTSIZE = 20

In [7]:
font = {'weight' : 'medium',
        'size'   : 20}

matplotlib.rc('font', **font)

In [8]:
# Define a function to horizontally stack arrays
def stack_arrays(arrays):
    return np.vstack(arrays)

In [9]:
# BAND_TO_FREQ_PLOT = {'theta': (4, 12), 'gamma': (30, 50)}
BAND_TO_FREQ_PLOT = {'theta': (4, 12)}
BAND_TO_FREQ_COLOR = {'theta': "#FFAF00", 'beta': "blue", 'gamma': "green"}

## Inputs & Data

In [10]:
EPHYS_SAMPLE_RATE = 20000

In [11]:
# GOOD_SUBJECTS = ["3.1", "3.3", "3.4", "4.2", "4.3", "5.2", "5.3"]
# GOOD_SUBJECTS = ["3.1", "4.2", "4.3"]
# GOOD_SUBJECTS = ["3.1", "3.3", "3.4", "4.2", "4.3"]
GOOD_SUBJECTS = ["3.1", "3.3", "3.4", "4.2", "4.3", "5.2", "5.3"]
# GOOD_SUBJECTS = ["3.1", "3.3", "3.4", "4.2", "4.3"]


Explanation of each input and where it comes from.

In [12]:
# TRIAL_LABELS_DF = pd.read_excel("/blue/npadillacoreano/ryoi360/projects/reward_comp/repos/reward_comp_ext/results/2024_06_26_sleap_clustering/data/rce_pilot_3_alone_comp_per_video_trial_labels.xlsx")
TRIALS_AND_SPECTRAL_DF = pd.read_pickle("./proc/cluster_exploded_TRIALS_AND_SPECTRAL_DF.pkl")

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

## Functions 

In [13]:
def combine_dicts(dicts):
    """
    Combine lists from multiple dictionaries that share the same key.

    This function takes a list of dictionaries where each dictionary's values are lists of numbers.
    It merges these lists for each corresponding key across all dictionaries, producing a single
    dictionary where each key has a combined list of all numbers from the input dictionaries.

    Parameters:
        dicts (list of dict): A list of dictionaries with values as lists of numbers.

    Returns:
        dict: A dictionary with keys from the input dictionaries and values as merged lists 
        of numbers from all corresponding input dictionary values.
    """
    combined = defaultdict(list)
    for dictionary in dicts:
        for key, value in dictionary.items():
            combined[key].extend(value)
    
    return dict(combined)


# Example usage
list_of_dicts = [
    {'a': [1, 2], 'b': [3, 4]},
    {'a': [5], 'b': [6, 7]},
    {'a': [8, 9], 'c': [10]}
]

combined_dict = combine_dicts(list_of_dicts)
print(combined_dict)

{'a': [1, 2, 5, 8, 9], 'b': [3, 4, 6, 7], 'c': [10]}


In [14]:
def find_consecutive_ranges(numbers, min_length=1):
    """
    Finds the start and end indices for consecutive ranges of each number in a list where the range meets a minimum length.

    This function iterates through a list of integers and identifies ranges where the same integer appears consecutively
    and the length of this sequence meets or exceeds the specified minimum length.

    Parameters:
        numbers (list): A list of integers to analyze for consecutive ranges.
        min_length (int): The minimum length of a range for it to be included in the results.

    Returns:
        dict: A dictionary with integers as keys and a list of tuples (start, end) as values,
              where each tuple represents the start and end indices (inclusive) of consecutive ranges
              for that integer. Only ranges that meet or exceed the minimum length are included.
    """
    ranges = {}
    n = len(numbers)
    if n == 0:
        return ranges
    
    start = 0
    current = numbers[0]

    for i in range(1, n):
        if numbers[i] != current:
            if (i - start) >= min_length:
                if current not in ranges:
                    ranges[current] = []
                ranges[current].append((start, i - 1))
            current = numbers[i]
            start = i

    # Handle the last range
    if (n - start) >= min_length:
        if current not in ranges:
            ranges[current] = []
        ranges[current].append((start, n - 1))

    return ranges

# Example usage:
numbers = [1, 1, 2, 2, 2, 3, 3, 3, 3, 2, 2, 1, 1]
print(find_consecutive_ranges(numbers, min_length=3))

{2: [(2, 4)], 3: [(5, 8)]}


In [15]:
def update_tuples_in_dict(original_dict, reference_list):
    """
    Updates the values in the tuples within a dictionary by replacing indices with corresponding values from a reference list.
    
    This function iterates through each key-value pair in the original dictionary. Each value is expected to be a list of tuples,
    where each tuple contains indices. These indices are used to fetch corresponding values from the reference list, creating new tuples.
    
    Parameters:
        original_dict (dict): Dictionary whose values are lists of tuples. Each tuple consists of indices into the reference_list.
        reference_list (list): List of elements that are referenced by the indices in the tuples of the original_dict.
        
    Returns:
        dict: A dictionary with the same keys as original_dict but with tuples transformed to contain elements from reference_list
              based on the indices in the original tuples.
    """
    # Create a new dictionary to store the updated key-value pairs
    new_dict = {}
    for key, list_of_tuples in original_dict.items():
        # Process each tuple in the list associated with the current key
        updated_tuples = [
            tuple(reference_list[idx] for idx in tup) for tup in list_of_tuples
        ]
        new_dict[key] = updated_tuples
    
    return new_dict

# Example usage:
original_dict = {
    'a': [(0, 1), (2, 3)],
    'b': [(1, 3), (0, 2)]
}
reference_list = ['alpha', 'beta', 'gamma', 'delta']

updated_dict = update_tuples_in_dict(original_dict, reference_list)
print(updated_dict)

{'a': [('alpha', 'beta'), ('gamma', 'delta')], 'b': [('beta', 'delta'), ('alpha', 'gamma')]}


In [16]:
# def find_indices_within_ranges(ranges_dict, values):
#     """
#     Creates a dictionary mapping keys to sorted indices of values that fall within specified ranges.
    
#     Parameters:
#         ranges_dict (dict): A dictionary with keys and values as lists of tuples representing ranges.
#         values (list): A list of values to check against the ranges.
        
#     Returns:
#         dict: A dictionary where each key maps to a sorted list of indices for values within the ranges.
#     """
#     result_dict = {}
#     for key, ranges in ranges_dict.items():
#         matched_indices = []
#         for index, value in enumerate(values):
#             if any(start <= value <= end for start, end in ranges):
#                 matched_indices.append(index)
#         result_dict[key] = sorted(matched_indices)
#     return list(result_dict.items())

# # Example usage:
# ranges_dict = {
#     'range1': [(1, 5), (10, 15)],
#     'range2': [(0, 2), (4, 8)]
# }
# values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

# result = find_indices_within_ranges(ranges_dict, values)
# print(result)


In [17]:
def update_tuples_in_list(original_list, reference_list):
    """
    Updates the values in the tuples within a list by replacing indices with corresponding values from a reference list.
    
    This function iterates through each tuple in the original list. Each tuple is expected to contain indices.
    These indices are used to fetch corresponding values from the reference list, creating new tuples.
    
    Parameters:
        original_list (list): List of tuples. Each inner tuple consists of indices into the reference_list.
        reference_list (list): List of elements that are referenced by the indices in the tuples of the original_list.
        
    Returns:
        list: A list with the same structure as original_list but with tuples transformed to contain elements from reference_list
              based on the indices in the original tuples.
    """
    # Create a new list to store the updated tuples
    new_list = [
        tuple(reference_list[idx] for idx in tup) for tup in original_list
    ]
    
    return new_list

# Example usage:
original_list = [
    (0, 1), (2, 3),
    (1, 3), (0, 2)
]
reference_list = ['alpha', 'beta', 'gamma', 'delta']

updated_list = update_tuples_in_list(original_list, reference_list)
print(updated_list)

[('alpha', 'beta'), ('gamma', 'delta'), ('beta', 'delta'), ('alpha', 'gamma')]


In [18]:
def find_indices_within_ranges(ranges_list, values):
    """
    Finds the indices of values that fall within specified ranges.
    
    Parameters:
        ranges_list (list): A list of tuples representing ranges.
        values (list): A list of values to check against the ranges.
        
    Returns:
        list: A list of sorted indices for values within the ranges.
    """
    matched_indices = []
    for index, value in enumerate(values):
        if any(start <= value <= end for start, end in ranges_list):
            matched_indices.append(index)
    
    return sorted(matched_indices)

# Example usage:
ranges_list = [(1, 5), (10, 15), (0, 2), (4, 8)]
values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

result = find_indices_within_ranges(ranges_list, values)
print(result)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15]


In [19]:
OUTCOME_TO_COLOR = {"both_rewarded": "green", 
"novel_lose": "#e67073",
"novel_win": "#93a5da",
"lose": "#951a1d",
"alone_rewarded": "#0499af",
"win": "#3853a3",
"omission": "orange",
"tie": "green"}

In [20]:
comp_id_to_color = {'competitive_1': "#281640",
'competitive_2': "#43246a",
'competitive_3': "#8e7ca6",
'no_comp_4': "#2f3600",
'no_comp_5': "#535f00",
'no_comp_6': "#768800",
'no_comp_7': "#9fac4d",
'no_comp_8': "#c8cf99",
'competitive': "#43246A",
'no_comp': "#768800",
'win': "#0045A6",
'win_competitive': "#003074",
'win_no_comp': "#4d7dc1",
'lose': "#792910",
'lose_competitive': "#551d0b",
'lose_no_comp': "#a16958",
'rewarded': "#FFAF00"
}

In [21]:
to_keep_columns = ['trial_label',
'tone_start_frame',
'reward_start',
'reward_dispensed',
'tone_stop_frame',
'condition',
'competition_closeness',
'get_reward_frame',
'out_reward_frame',
'notes',
'box_1_port_entry_frames',
'box_2_port_entry_frames',
'video_name',
'tone_start_timestamp',
'tone_stop_timestamp',
'box_1_port_entry_timestamps',
'box_2_port_entry_timestamps',
'current_subject',
'session_dir',
'experiment',
'sleap_name',
'video_id',
'agent',
'all_subjects',
'cohort',
'first_timestamp',
'last_timestamp',
'recording',
'session_path',
'subject',
'baseline_start_timestamp',
'post_trial_end_timestamp',]

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [22]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names




In [23]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [24]:
# Import the linear regression model class
from pymer4.models import Lm

In [25]:
# Import the lmm model class
from pymer4.models import Lmer



In [26]:
TRIALS_AND_SPECTRAL_DF.head()

Unnamed: 0,tone_start_frame,reward_start,reward_dispensed,tone_stop_frame,condition,competition_closeness,get_reward_frame,out_reward_frame,experiment,video_name,...,cluster_mean_trial_MD_power_all_frequencies_all_windows,cluster_mean_trial_MD_power_gamma,cluster_mean_trial_MD_power_theta,cluster_mean_trial_mPFC_power_all_frequencies_all_windows,cluster_mean_trial_mPFC_power_gamma,cluster_mean_trial_mPFC_power_theta,cluster_mean_trial_vHPC_power_all_frequencies_all_windows,cluster_mean_trial_vHPC_power_gamma,cluster_mean_trial_vHPC_power_theta,cluster_mean_trial_power_timestamps
0,1272,1352,1372.0,1471,rewarded,,,,alone_comp,20240320_142408_alone_comp_subj_3-1_and_3-3.1,...,"[0.0026486786, 0.005430412, 0.0132795125, 0.02...",0.000995,0.022309,"[0.002943836, 0.0073701115, 0.020628551, 0.036...",0.001533,0.019509,"[0.002921776, 0.005711304, 0.010609054, 0.0225...",0.001448,0.03265,1375000.0
1,1272,1352,1372.0,1471,rewarded,,,,alone_comp,20240320_142408_alone_comp_subj_3-1_and_3-3.1,...,"[0.0013180817, 0.0019057549, 0.0038911868, 0.0...",0.000861,0.011037,"[0.0025098089, 0.004177024, 0.0070650093, 0.01...",0.000968,0.015476,"[0.004501448, 0.0047970195, 0.010342834, 0.012...",0.001023,0.018229,1375000.0
2,3268,3348,3368.0,3469,rewarded,,,,alone_comp,20240320_142408_alone_comp_subj_3-1_and_3-3.1,...,"[0.001690788, 0.0043548183, 0.019096818, 0.038...",0.001067,0.023897,"[0.002300576, 0.005869463, 0.024728615, 0.0556...",0.001711,0.024114,"[0.0025493593, 0.008762072, 0.020663612, 0.032...",0.001143,0.026148,3375000.0
3,3268,3348,3368.0,3469,rewarded,,,,alone_comp,20240320_142408_alone_comp_subj_3-1_and_3-3.1,...,"[0.0013261967, 0.00161965, 0.00448946, 0.00852...",0.000872,0.009743,"[0.0018210271, 0.0033454504, 0.009506376, 0.02...",0.001049,0.014004,"[0.0023885863, 0.0030379929, 0.0071357437, 0.0...",0.001021,0.016937,3375000.0
4,4466,4546,4566.0,4667,rewarded,,,,alone_comp,20240320_142408_alone_comp_subj_3-1_and_3-3.1,...,"[0.0016349533, 0.0036012302, 0.0095740575, 0.0...",0.000979,0.022719,"[0.0021478166, 0.003128228, 0.01146675, 0.0657...",0.001512,0.027959,"[0.0042967293, 0.010051728, 0.017485294, 0.032...",0.001223,0.032853,4575000.0


In [27]:
cluster_mean_columns = [col for col in TRIALS_AND_SPECTRAL_DF if "cluster_mean_trial" in col and ("gamma" in col or "theta" in col)]

In [28]:
list(TRIALS_AND_SPECTRAL_DF.columns)

['tone_start_frame',
 'reward_start',
 'reward_dispensed',
 'tone_stop_frame',
 'condition',
 'competition_closeness',
 'get_reward_frame',
 'out_reward_frame',
 'experiment',
 'video_name',
 'tone_start_timestamp',
 'tone_stop_timestamp',
 'cohort',
 'session_dir',
 'tone_frames',
 'box_1_port_entry_frames',
 'box_2_port_entry_frames',
 'notes',
 'session_path',
 'recording',
 'current_subject',
 'subject',
 'all_subjects',
 'first_timestamp',
 'last_timestamp',
 'video_timestamps',
 'tone_timestamps',
 'box_1_port_entry_timestamps',
 'box_2_port_entry_timestamps',
 'power_calculation_frequencies',
 'coherence_calculation_frequencies',
 'granger_calculation_frequencies',
 'tone_start_frame_remove',
 'agent_tail_base',
 'agent_to_reward_port_angle',
 'subject_tail_base',
 'subject_to_reward_port_angle',
 'to_reward_port_angle_diff',
 'to_reward_port_angle_sum',
 'within_trial_index',
 'agent',
 '10s_after_tone_frame',
 '10s_before_tone_frame',
 'all_subjects_remove',
 'body_parts',
 'b

In [29]:
TRIALS_AND_SPECTRAL_DF["trial_and_competitiveness_label"]

0             rewarded
1             rewarded
2             rewarded
3             rewarded
4             rewarded
            ...       
630       lose_no_comp
631    win_competitive
631        win_no_comp
632    tie_competitive
633    tie_competitive
Name: trial_and_competitiveness_label, Length: 1303, dtype: object

In [30]:
TRIALS_AND_SPECTRAL_DF["current_subject"]

0      3.1
1      3.3
2      3.1
3      3.3
4      3.1
      ... 
630    5.2
631    5.3
631    5.3
632    5.2
633    5.3
Name: current_subject, Length: 1303, dtype: object

In [31]:
TRIALS_AND_SPECTRAL_DF = TRIALS_AND_SPECTRAL_DF[~TRIALS_AND_SPECTRAL_DF["trial_and_competitiveness_label"].str.contains("tie")]

In [32]:
TRIALS_AND_SPECTRAL_DF["trial_and_competitiveness_label"].unique()

array(['rewarded', 'win_competitive', 'win_no_comp', 'lose_competitive',
       'lose_no_comp'], dtype=object)

In [47]:
TRIALS_AND_SPECTRAL_DF[["trial_and_competitiveness_label", "current_subject", "recording", "video_name"] + cluster_mean_columns].to_excel("./proc/competitiveness_trials_and_spectral_mean.xlsx")

ModuleNotFoundError: No module named 'openpyxl'

In [43]:
list(TRIALS_AND_SPECTRAL_DF)

['tone_start_frame',
 'reward_start',
 'reward_dispensed',
 'tone_stop_frame',
 'condition',
 'competition_closeness',
 'get_reward_frame',
 'out_reward_frame',
 'experiment',
 'video_name',
 'tone_start_timestamp',
 'tone_stop_timestamp',
 'cohort',
 'session_dir',
 'tone_frames',
 'box_1_port_entry_frames',
 'box_2_port_entry_frames',
 'notes',
 'session_path',
 'recording',
 'current_subject',
 'subject',
 'all_subjects',
 'first_timestamp',
 'last_timestamp',
 'video_timestamps',
 'tone_timestamps',
 'box_1_port_entry_timestamps',
 'box_2_port_entry_timestamps',
 'power_calculation_frequencies',
 'coherence_calculation_frequencies',
 'granger_calculation_frequencies',
 'tone_start_frame_remove',
 'agent_tail_base',
 'agent_to_reward_port_angle',
 'subject_tail_base',
 'subject_to_reward_port_angle',
 'to_reward_port_angle_diff',
 'to_reward_port_angle_sum',
 'within_trial_index',
 'agent',
 '10s_after_tone_frame',
 '10s_before_tone_frame',
 'all_subjects_remove',
 'body_parts',
 'b

In [33]:
raise ValueError()

ValueError: 

In [34]:
from pymer4.utils import get_resource_path
from pymer4.models import Lmer

# IV3 is a categorical predictors with 3 levels in the sample data
df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))

# # We're going to fit a multi-level regression using the
# categorical predictor (IV3) which has 3 levels
model = Lmer("DV ~ IV3 + (1|Group)", data=df)

# Using dummy-coding; suppress summary output
model.fit(factors={"IV3": ["1.0", "0.5", "1.5"]}, summarize=False)

# Get ANOVA table
print(model.anova())

SS Type III Analysis of Variance Table with Satterthwaite approximated degrees of freedom:
(NOTE: Using original model contrasts, orthogonality not guaranteed)
              SS           MS  NumDF     DenomDF    F-stat     P-val Sig
IV3  2359.778135  1179.889067      2  515.000001  5.296284  0.005287  **


In [36]:
df["IV3"].unique()

array([0.5, 1. , 1.5])

In [40]:
df

Unnamed: 0,Group,IV1,DV_l,DV,IV2,IV3
0,1,20.0,0,7.936508,4.563492,0.5
1,1,20.0,0,15.277778,0.000000,1.0
2,1,20.0,1,0.000000,0.000000,1.5
3,1,20.0,1,9.523810,0.000000,0.5
4,1,12.5,0,0.000000,0.000000,1.0
...,...,...,...,...,...,...
559,53,8.0,0,48.412698,13.492063,1.0
560,53,5.0,1,50.000000,70.039683,1.5
561,53,12.5,0,50.000000,69.047619,0.5
562,53,5.0,0,50.000000,71.626984,1.0


In [39]:
col_to_model = {}
for col in cluster_mean_columns:
    # Initialize model instance using 1 predictor with random intercepts and slopes
    model = Lmer("{} ~ trial_and_competitiveness_label + (1 | current_subject)".format(col), data=TRIALS_AND_SPECTRAL_DF)

    

    # Using dummy-coding; suppress summary output
    model.fit(factors={"current_subject": list(TRIALS_AND_SPECTRAL_DF["current_subject"].unique())}, summarize=False)
    
    col_to_model[col] = model
    
    # Get ANOVA table
    print(model.anova())
    break

**NOTE**: Column for 'residuals' not created in model.data, but saved in model.resid only. This is because you have rows with NaNs in your data.

**NOTE** Column for 'fits' not created in model.data, but saved in model.fits only. This is because you have rows with NaNs in your data.

SS Type III Analysis of Variance Table with Satterthwaite approximated degrees of freedom:
(NOTE: Using original model contrasts, orthogonality not guaranteed)
                                       SS        MS  NumDF      DenomDF  \
trial_and_competitiveness_label  0.074824  0.018706      4  1167.160411   

                                   F-stat     P-val  Sig  
trial_and_competitiveness_label  8.282627  0.000001  ***  


In [None]:
"{} ~ trial_and_competitiveness_label + (1 | current_subject)".format(col)

In [None]:
model.fit()