# All oscillation analysis

Brief 1-2 sentence description of notebook.

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
import collections
import itertools
from collections import defaultdict
from itertools import combinations

In [3]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd
from scipy import stats
from scipy.stats import mannwhitneyu


In [4]:
import matplotlib
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
import colorsys

In [5]:
from sklearn.metrics import confusion_matrix

In [6]:
from spectral_connectivity import Multitaper, Connectivity
import spectral_connectivity

In [7]:
FONTSIZE = 20

In [8]:
font = {'weight' : 'medium',
        'size'   : 20}

matplotlib.rc('font', **font)

# Functions

In [9]:
def generate_pairs(lst):
    """
    Generates all unique pairs from a list.

    Parameters:
    - lst (list): The list to generate pairs from.

    Returns:
    - list: A list of tuples, each containing a unique pair from the input list.
    """
    n = len(lst)
    return [(lst[i], lst[j]) for i in range(n) for j in range(i+1, n)]

## Inputs & Data

Explanation of each input and where it comes from.

In [10]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs

In [11]:
TIME_HALFBANDWIDTH_PRODUCT = 2
TIME_WINDOW_DURATION = 1
TIME_WINDOW_STEP = 0.5
RESAMPLE_RATE=1000

In [12]:
LFP_TRACES_DF = pd.read_pickle("/blue/npadillacoreano/ryoi360/reward_competition_extention/final_proc/rce_pilot_2_01_lfp_traces_and_frames.pkl")

In [13]:
LFP_TRACES_DF.head()

Unnamed: 0,cohort,session_dir,original_file,tone_frames,box_1_port_entry_frames,box_2_port_entry_frames,video_name,session_path,recording,current_subject,...,video_timestamps,tone_timestamps,box_1_port_entry_timestamps,box_2_port_entry_timestamps,lfp_timestamps,mPFC_lfp_trace,MD_lfp_trace,LH_lfp_trace,BLA_lfp_trace,vHPC_lfp_trace
0,2,20230612_101430_standard_comp_to_training_D1_s...,20230612_101430_standard_comp_to_training_D1_s...,"[[980, 1180], [3376, 3575], [5672, 5871], [746...","[[490, 514], [518, 558], [558, 637], [638, 640...","[[33021, 33027], [33502, 33503], [33504, 33506...",20230612_101430_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_101430_standard_comp_to_training_D1_s...,1.3,...,"[-2, 1384, 2770, 4156, 4156, 5542, 6928, 6928,...","[[982229, 1182226], [3382227, 3582224], [56822...","[[491029, 515227], [519426, 558629], [559427, ...","[[33082200, 33090003], [33565003, 33567000], [...","[0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 2...","[0.933348, 0.8057418, 0.9461086, 1.2687699, 1....","[0.44765243, 0.479771, 0.7427417, 0.97560126, ...","[0.89443207, 0.96188104, 1.1935536, 1.3225864,...","[0.6654362, 0.6609094, 0.87366796, 1.0230516, ...","[0.2327341, 0.3381231, 0.5568053, 0.74650556, ..."
1,2,20230612_101430_standard_comp_to_training_D1_s...,20230612_101430_standard_comp_to_training_D1_s...,"[[980, 1180], [3376, 3575], [5672, 5871], [746...","[[490, 514], [518, 558], [558, 637], [638, 640...","[[33021, 33027], [33502, 33503], [33504, 33506...",20230612_101430_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_101430_standard_comp_to_training_D1_s...,1.4,...,"[-2, 1384, 2770, 4156, 4156, 5542, 6928, 6928,...","[[982229, 1182226], [3382227, 3582224], [56822...","[[491029, 515227], [519426, 558629], [559427, ...","[[33082200, 33090003], [33565003, 33567000], [...","[0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 2...","[0.27034205, 0.4041477, 0.45876226, 0.52429974...","[0.8194214, 0.80269855, 0.71908414, 0.64104396...","[0.8116741, 0.7202179, 0.6230456, 0.64590967, ...","[0.7982271, 1.0675378, 1.1112098, 0.8346204, 1...","[0.882244, 1.2294496, 1.485585, 1.2322956, 1.0..."
2,2,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.1,...,"[1384, 2444, 2769, 4155, 5541, 6708, 6927, 831...","[[1126742, 1326741], [3526740, 3726740], [5826...","[[192745, 249350], [389747, 407142], [917544, ...","[[33037711, 33038706], [33264908, 33270313], [...","[0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 2...","[-0.058899105, 0.19379705, 0.72198904, 1.09058...","[0.052017204, 0.2566182, 0.409202, 0.2288757, ...","[0.0039443844, 0.2524406, 0.48910367, 0.408243...","[0.0017428675, 0.040085953, 0.1289722, 0.19345...","[0.24099746, 0.36528546, 0.31526712, -0.039408..."
3,2,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.2,...,"[1384, 2444, 2769, 4155, 5541, 6708, 6927, 831...","[[1126742, 1326741], [3526740, 3726740], [5826...","[[192745, 249350], [389747, 407142], [917544, ...","[[33037711, 33038706], [33264908, 33270313], [...","[0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 2...","[-0.5686467, -0.88410044, -0.7741067, -0.08716...","[-0.7133093, -1.0918, -0.88557106, -0.07278667...","[-0.38970518, -0.8093877, -0.46165076, 0.39570...","[0.22564748, -0.09320222, 0.16678292, 0.760333...","[1.2494063, 1.0194397, 0.9779509, 1.0810802, 0..."
4,2,20230613_105657_standard_comp_to_training_D2_s...,20230613_105657_standard_comp_to_training_D2_s...,"[[916, 1117], [3312, 3513], [5608, 5808], [740...","[[49, 67], [70, 79], [360, 366], [460, 469], [...","[[33601, 33798], [34108, 34165], [34166, 34179...",20230613_105657_standard_comp_to_training_D2_s...,/scratch/back_up/reward_competition_extention/...,20230613_105657_standard_comp_to_training_D2_s...,1.1,...,"[1384, 1384, 2770, 4156, 4156, 5541, 6927, 831...","[[918755, 1118758], [3318755, 3518757], [56187...","[[49358, 67558], [70155, 79355], [360955, 3671...","[[33624333, 33822933], [34132932, 34190535], [...","[0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 2...","[-0.6503345, -0.6020239, -0.89374536, -1.10928...","[-0.99070936, -0.8981983, -1.0546261, -1.26992...","[-0.94037557, -0.9188701, -1.2414521, -1.48778...","[-1.0461473, -0.8379503, -0.76224226, -0.69685...","[-0.9619772, -0.89879316, -1.0425369, -1.38846..."


# Power Calcuation

- Getting the column name of all the traces

In [14]:
trace_columns = [col for col in LFP_TRACES_DF.columns if "lfp_trace" in col]

In [15]:
trace_columns

['mPFC_lfp_trace',
 'MD_lfp_trace',
 'LH_lfp_trace',
 'BLA_lfp_trace',
 'vHPC_lfp_trace']

- Calcuating the power at each frequency band

In [17]:
for col in trace_columns:
    brain_region = col.split("_")[0]
    print(brain_region)

    # Define column names
    lfp_trace_col = f"{brain_region}_lfp_trace"
    multitaper_col = f"{brain_region}_power_multitaper"
    connectivity_col = f"{brain_region}_power_connectivity"
    frequencies_col = f"{brain_region}_power_calculation_frequencies"
    power_col = f"{brain_region}_power_all_frequencies_all_windows"
    
    # Apply Multitaper function to the lfp_trace column
    LFP_TRACES_DF[multitaper_col] = LFP_TRACES_DF[lfp_trace_col].apply(
        lambda x: Multitaper(
            time_series=x, 
            sampling_frequency=RESAMPLE_RATE, 
            time_halfbandwidth_product=TIME_HALFBANDWIDTH_PRODUCT,
            time_window_duration=TIME_WINDOW_DURATION, 
            time_window_step=TIME_WINDOW_STEP
        )
    )

    # Apply Connectivity function to the multitaper column
    LFP_TRACES_DF[connectivity_col] = LFP_TRACES_DF[multitaper_col].apply(
        lambda x: Connectivity.from_multitaper(x)
    )

    # Apply frequencies and power functions to the connectivity column
    LFP_TRACES_DF[frequencies_col] = LFP_TRACES_DF[connectivity_col].apply(
        lambda x: x.frequencies
    )
    LFP_TRACES_DF[power_col] = LFP_TRACES_DF[connectivity_col].apply(
        lambda x: x.power().squeeze()
    )
    
    # Removing unnecessary columns
    LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[multitaper_col, connectivity_col], errors="ignore")

mPFC
MD
LH
BLA
vHPC


- Getting the timestamps of the power

In [18]:
LFP_TRACES_DF["power_timestamps"] = LFP_TRACES_DF["lfp_timestamps"].apply(lambda x: x[(RESAMPLE_RATE//2):(-RESAMPLE_RATE//2):(RESAMPLE_RATE//2)])
# .iloc[0][500:-500:500].shape

- Making sure that the timestamps for power makes sense with shape and values

In [19]:
LFP_TRACES_DF["power_timestamps"].head().apply(lambda x: x.shape)

0    (6828,)
1    (6828,)
2    (6833,)
3    (6833,)
4    (6840,)
Name: power_timestamps, dtype: object

In [20]:
LFP_TRACES_DF[[col for col in LFP_TRACES_DF.columns if "power_all_frequencies_all_windows" in col][0]].iloc[0].shape

(6828, 500)

In [26]:
LFP_TRACES_DF[[col for col in LFP_TRACES_DF.columns if "lfp_timestamps" in col][0]].iloc[0]

array([       0,       20,       40, ..., 68293300, 68293320, 68293340],
      dtype=int32)

In [25]:
LFP_TRACES_DF[[col for col in LFP_TRACES_DF.columns if "power_timestamps" in col][0]].iloc[0]

array([   10000,    20000,    30000, ..., 68260000, 68270000, 68280000],
      dtype=int32)

- Checking if the right frequencies are being used

In [22]:
LFP_TRACES_DF[[col for col in LFP_TRACES_DF.columns if "power_calculation_frequencies" in col]].head()

Unnamed: 0,mPFC_power_calculation_frequencies,MD_power_calculation_frequencies,LH_power_calculation_frequencies,BLA_power_calculation_frequencies,vHPC_power_calculation_frequencies
0,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ..."
1,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ..."
2,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ..."
3,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ..."
4,"[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ..."


- Dropping unnecessary columns

In [23]:
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF.columns if "power_calculation_frequencies" in col], errors="ignore")

In [27]:
LFP_TRACES_DF.head()

Unnamed: 0,cohort,session_dir,original_file,tone_frames,box_1_port_entry_frames,box_2_port_entry_frames,video_name,session_path,recording,current_subject,...,MD_lfp_trace,LH_lfp_trace,BLA_lfp_trace,vHPC_lfp_trace,mPFC_power_all_frequencies_all_windows,MD_power_all_frequencies_all_windows,LH_power_all_frequencies_all_windows,BLA_power_all_frequencies_all_windows,vHPC_power_all_frequencies_all_windows,power_timestamps
0,2,20230612_101430_standard_comp_to_training_D1_s...,20230612_101430_standard_comp_to_training_D1_s...,"[[980, 1180], [3376, 3575], [5672, 5871], [746...","[[490, 514], [518, 558], [558, 637], [638, 640...","[[33021, 33027], [33502, 33503], [33504, 33506...",20230612_101430_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_101430_standard_comp_to_training_D1_s...,1.3,...,"[0.44765243, 0.479771, 0.7427417, 0.97560126, ...","[0.89443207, 0.96188104, 1.1935536, 1.3225864,...","[0.6654362, 0.6609094, 0.87366796, 1.0230516, ...","[0.2327341, 0.3381231, 0.5568053, 0.74650556, ...","[[0.04169420203133221, 0.0802730627855665, 0.0...","[[0.00632302042850671, 0.01745955943866885, 0....","[[0.01958979591078521, 0.04858419484241715, 0....","[[0.037601236825227174, 0.024253116170145916, ...","[[0.00963684644472439, 0.005739311937810972, 0...","[10000, 20000, 30000, 40000, 50000, 60000, 700..."
1,2,20230612_101430_standard_comp_to_training_D1_s...,20230612_101430_standard_comp_to_training_D1_s...,"[[980, 1180], [3376, 3575], [5672, 5871], [746...","[[490, 514], [518, 558], [558, 637], [638, 640...","[[33021, 33027], [33502, 33503], [33504, 33506...",20230612_101430_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_101430_standard_comp_to_training_D1_s...,1.4,...,"[0.8194214, 0.80269855, 0.71908414, 0.64104396...","[0.8116741, 0.7202179, 0.6230456, 0.64590967, ...","[0.7982271, 1.0675378, 1.1112098, 0.8346204, 1...","[0.882244, 1.2294496, 1.485585, 1.2322956, 1.0...","[[0.00025558452388879546, 0.000589764980425109...","[[0.012226195470614404, 0.012737730544356085, ...","[[0.01642187126767235, 0.017656380769446853, 0...","[[0.0004749562250888264, 0.00213518215904488, ...","[[0.0012590738531862485, 0.0018918990992088727...","[10000, 20000, 30000, 40000, 50000, 60000, 700..."
2,2,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.1,...,"[0.052017204, 0.2566182, 0.409202, 0.2288757, ...","[0.0039443844, 0.2524406, 0.48910367, 0.408243...","[0.0017428675, 0.040085953, 0.1289722, 0.19345...","[0.24099746, 0.36528546, 0.31526712, -0.039408...","[[0.021781332050411613, 0.009884830970954838, ...","[[0.009157902832466413, 0.0061483973893087214,...","[[0.016855146879814254, 0.011329709006153408, ...","[[0.00823432178047688, 0.005799925057989854, 0...","[[0.001064310349200692, 0.00697524514144805, 0...","[10000, 20000, 30000, 40000, 50000, 60000, 700..."
3,2,20230612_112630_standard_comp_to_training_D1_s...,20230612_112630_standard_comp_to_training_D1_s...,"[[1125, 1324], [3519, 3720], [5815, 6014], [76...","[[192, 248], [389, 405], [916, 929], [929, 948...","[[33019, 33020], [33246, 33251], [33253, 33255...",20230612_112630_standard_comp_to_training_D1_s...,/scratch/back_up/reward_competition_extention/...,20230612_112630_standard_comp_to_training_D1_s...,1.2,...,"[-0.7133093, -1.0918, -0.88557106, -0.07278667...","[-0.38970518, -0.8093877, -0.46165076, 0.39570...","[0.22564748, -0.09320222, 0.16678292, 0.760333...","[1.2494063, 1.0194397, 0.9779509, 1.0810802, 0...","[[0.18145923216443083, 0.2057156756448491, 0.2...","[[0.2328621757265994, 0.23294940155800348, 0.3...","[[0.3685718278926778, 0.36854706998129544, 0.4...","[[0.3391210999913441, 0.2897891034771225, 0.37...","[[0.23286783552444634, 0.17396429117361695, 0....","[10000, 20000, 30000, 40000, 50000, 60000, 700..."
4,2,20230613_105657_standard_comp_to_training_D2_s...,20230613_105657_standard_comp_to_training_D2_s...,"[[916, 1117], [3312, 3513], [5608, 5808], [740...","[[49, 67], [70, 79], [360, 366], [460, 469], [...","[[33601, 33798], [34108, 34165], [34166, 34179...",20230613_105657_standard_comp_to_training_D2_s...,/scratch/back_up/reward_competition_extention/...,20230613_105657_standard_comp_to_training_D2_s...,1.1,...,"[-0.99070936, -0.8981983, -1.0546261, -1.26992...","[-0.94037557, -0.9188701, -1.2414521, -1.48778...","[-1.0461473, -0.8379503, -0.76224226, -0.69685...","[-0.9619772, -0.89879316, -1.0425369, -1.38846...","[[0.03205044002405316, 0.023584636883166452, 0...","[[0.07614841881452278, 0.049348754656105644, 0...","[[0.048744325828076675, 0.031626995161513904, ...","[[0.09511804801095025, 0.06073760225092326, 0....","[[0.03449033639067145, 0.020391753288298446, 0...","[10000, 20000, 30000, 40000, 50000, 60000, 700..."


In [None]:
raise ValueError()

## Coherence Calculation

- Getting the trace column pairs

In [None]:
trace_columns

In [None]:
brain_region_pairs = generate_pairs(sorted(trace_columns))
brain_region_pairs = sorted(brain_region_pairs)


In [None]:
brain_region_pairs

- Getting just the region names

In [None]:
all_pair_base_name = []
for region_1, region_2 in brain_region_pairs:
    all_pair_base_name.append("{}_{}".format(region_1.split("_")[0], region_2.split("_")[0]))
all_pair_base_name = sorted(all_pair_base_name)

In [None]:
all_pair_base_name

## Coherece Calculation

- Calculating the coherence

In [None]:
LFP_TRACES_DF.head()

In [None]:
for region_1, region_2 in brain_region_pairs:
    pair_base_name = "{}_{}".format(region_1.split("_")[0], region_2.split("_")[0])
    print(pair_base_name)
    
    try:
        multitaper_col = "{}_multitaper".format(pair_base_name)
        LFP_TRACES_DF[multitaper_col] = LFP_TRACES_DF.apply(lambda x: Multitaper(time_series=np.array([x[region_1],x[region_2]]).T, sampling_frequency=RESAMPLE_RATE, time_halfbandwidth_product=TIME_HALFBANDWIDTH_PRODUCT, time_window_step=TIME_WINDOW_STEP, time_window_duration=TIME_WINDOW_DURATION), axis=1)
    
        connectivity_col = "{}_connectivity".format(pair_base_name)
        LFP_TRACES_DF[connectivity_col] = LFP_TRACES_DF[multitaper_col].apply(lambda x: Connectivity.from_multitaper(x))
        
        LFP_TRACES_DF["{}_frequencies".format(pair_base_name)] = LFP_TRACES_DF[connectivity_col].apply(lambda x: x.frequencies)
    
        LFP_TRACES_DF["{}_all_window_coherence".format(pair_base_name)] = LFP_TRACES_DF[connectivity_col].apply(lambda x: x.coherence_magnitude()[:,:,0,1])

    except Exception as e: 
        print(e)
    LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[multitaper_col, connectivity_col], errors="ignore")
    break
    

In [None]:
LFP_TRACES_DF.head()
# ["BLA_LH_all_window_coherence"].head()

In [None]:
LFP_TRACES_DF["lfp_timestamps"].iloc[0]

In [None]:
68293340 / 20000

In [None]:
68280000/20000

In [None]:
raise ValueError()

In [None]:
CHANNEL_MAP_AND_ALL_TRIALS_DF = pd.read_pickle("./proc/full_baseline_and_trial_lfp_traces.pkl")
MERGED_TRIALS_AND_VIDEO = pd.read_pickle("./proc/trial_SLEAP_and_metadata.pkl")

In [None]:
OUTCOME_TO_COLOR = {'lose': "red",
 'omission': "orange",
 'rewarded': "green",
 'win': "blue"}

OUTCOME_TO_COLOR = {'lose': "#951a1d",
 'omission': "#af780d",
 'rewarded': "#0499af",
 'win': "#3853a3",
'lose_comp': "#951a1d",
 'win_comp': "#3853a3",
'lose_non_comp': "#e67073",
 'win_non_comp': "#93a5da"}

In [None]:
OUTCOME_COMPARISONS = {"win_lose": ("win", "lose"), "lose_omission": ("lose", "omission"), "win_rewarded": ("win", "rewarded")}

In [None]:
TRIAL_OR_BASELINE_TO_STYLE = {'baseline': "--", "trial": "-"}

In [None]:
TIME_HALFBANDWIDTH_PRODUCT = 2
TIME_WINDOW_DURATION = 1
TIME_WINDOW_STEP = 0.5
RESAMPLE_RATE=1000

In [None]:
BIN_TO_VELOCITY = {0: "0 to 2.5cm/s", 1: "2.5 to 5cm/s", 2: "5 to 10 cm/s", 3: "10cm/s+"}

In [None]:
MIN_VELOCITY = 0
MAX_VELOCITY = 2.5

In [None]:
MIN_DISTANCE = 0
MAX_DISTANCE = 5

In [None]:
ALL_BANDS = ["theta", "beta", "gamma"]
BAND_TO_FREQ = {"theta": {"low_freq": 6, "high_freq": 11}, "beta": {"low_freq": 20, "high_freq": 31}, "gamma": {"low_freq": 30, "high_freq": 51}}
BAND_TO_FREQ = {"theta": (6,11), "beta": (20,31), "gamma": (30,51)}

In [None]:
BAND_TO_YLIM = {"theta": 0.065,
"beta": 0.007,
"gamma": 0.005}

In [None]:
BRAIN_REGIONS = ["mPFC",
"vHPC",
"BLA",
"LH",
"MD"]

In [None]:
NUMBER_OF_COMPARISONS = 3

In [None]:
GROUPINGS = "trial_outcome"
# GROUPINGS = "competition_closeness"

In [None]:
FONTSIZE=20

In [None]:
MERGED_TRIALS_AND_VIDEO.head()

In [None]:
CHANNEL_MAP_AND_ALL_TRIALS_DF.head()

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

## Functions 

In [None]:
def generate_pairs(lst):
    pairs = []
    n = len(lst)
    for i in range(n):
        for j in range(i+1, n):
            pairs.append((lst[i], lst[j]))
    return pairs

In [None]:
def nested_dict():
    return defaultdict(dict)

In [None]:
def mean_of_counter(counter):
    # Extract values from the Counter and calculate the mean
    values = list(counter.values())
    return sum(values) / len(values) if values else 0  # Prevent division by zero

In [None]:
def adjust_lightness(color, amount=0.5):
    """
    Adjusts the lightness of the given color by the provided amount.
    :param color: Input color in some format that matplotlib's `to_rgb` can handle.
    :param amount: Amount to adjust. > 1 for lighter, < 1 for darker.
    :return: Adjusted color.
    """
    import colorsys
    c = mcolors.to_rgb(color)
    h, l, s = colorsys.rgb_to_hls(*c)
    return colorsys.hls_to_rgb(h, max(0, min(1, amount * l)), s)

In [None]:
def generate_gradient_colors(base_color, num_colors=10):
    """
    Generate gradient colors based on a base color.
    
    :param base_color: The base color to create a gradient from.
    :param num_colors: The number of gradient colors to generate.
    :return: List of colors.
    """
    # Convert base color to HLS (Hue, Lightness, Saturation)
    h, l, s = colorsys.rgb_to_hls(*mcolors.to_rgb(base_color))

    # Determine the step size for lightness
    middle_index = num_colors // 2
    lightness_step = l / (middle_index if middle_index > 0 else 1)

    colors = []
    for i in range(num_colors):
        if i < middle_index:
            # For the lighter colors (before the middle)
            new_lightness = l + (middle_index - i) * lightness_step
        elif i == middle_index:
            # The middle color remains the base color
            new_lightness = l
        else:
            # For the darker colors (after the middle)
            new_lightness = l - (i - middle_index) * lightness_step

        # Ensure new lightness is within the valid range [0, 1]
        new_lightness = max(0, min(new_lightness, 1))

        new_color = colorsys.hls_to_rgb(h, new_lightness, s)
        colors.append(new_color)

    return colors

In [None]:
def get_significance_stars_from_p_value(p_value, number_of_comparisons=3):
    """
    """
    if p_value <= 0.001 / number_of_comparisons:
        return "***"
    elif p_value <= 0.01 / number_of_comparisons:
        return "***"
    elif p_value <= 0.05 / number_of_comparisons:
        return "*"
    else:
        return ""

In [None]:
# Original color
original_color = 'red'  # Can be any color matplotlib understands

# Generate lighter versions
lighter_colors = [adjust_lightness(original_color, amount=1+(0.1*i)) for i in range(12)]

# Plotting to demonstrate the colors
for i, color in enumerate(lighter_colors):
    plt.plot([i-0.5, i+0.5], [1, 1], color=color, linewidth=6)  

plt.ylim(0.9, 1.1)
plt.show()

In [None]:
# Base color
base_color = '#ff5733'  # Reddish color

# Number of colors you want in the gradient
num_colors = 10

# Generate gradient colors
gradient_colors = generate_gradient_colors(base_color, num_colors)

# Set the color cycle to use the gradient colors
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=gradient_colors)

# Test by plotting some data
for i in range(num_colors):
    plt.plot(np.arange(10), np.random.rand(10) + i)

plt.show()

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [None]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names




In [None]:
CHANNEL_MAP_AND_ALL_TRIALS_DF.columns

In [None]:
MERGED_TRIALS_AND_VIDEO.columns

### Merging LFP Trace dataframe and SLEAP pose tracking

In [None]:
merge_columns = ["time", "recording_file", "current_subject", "video_number"]
# Find columns in df2 that are not in merge_columns and also exist in df1, then drop them from df2
cols_to_drop = [col for col in CHANNEL_MAP_AND_ALL_TRIALS_DF.columns if col not in merge_columns and col in MERGED_TRIALS_AND_VIDEO.columns]
CHANNEL_MAP_AND_ALL_TRIALS_DF = CHANNEL_MAP_AND_ALL_TRIALS_DF.drop(columns=cols_to_drop)

In [None]:
LFP_TRACES_DF = pd.merge(left=CHANNEL_MAP_AND_ALL_TRIALS_DF, right=MERGED_TRIALS_AND_VIDEO, on=merge_columns, how="right")

In [None]:
LFP_TRACES_DF.head()

In [None]:
trace_columns = [col for col in LFP_TRACES_DF.columns if "trace" in col]

In [None]:
trial_trace_columns = sorted([col for col in LFP_TRACES_DF.columns if "trial_lfp_trace" in col])
baseline_trace_columns = sorted([col for col in LFP_TRACES_DF.columns if "baseline_lfp_trace" in col])

In [None]:
trial_trace_columns

### Averaging the power for all the windows(without any velocity parsing) 

In [None]:
power_all_window_columns = [col for col in LFP_TRACES_DF.columns if "power_all-window" in col and "baseline-trial" not in col]

In [None]:
power_all_window_columns

In [None]:
for col in power_all_window_columns:
    brain_region = "_".join(col.split("_")[:2])
    print(brain_region)
    LFP_TRACES_DF["{}_power_window-averaged".format(brain_region)] = LFP_TRACES_DF.apply(lambda x: np.array(x[col]).mean(axis=0), axis=1)

In [None]:
LFP_TRACES_DF["{}_power_window-averaged".format(brain_region)].iloc[0].shape

In [None]:
LFP_TRACES_DF.drop(columns=trace_columns, errors="ignore").to_pickle("./proc/rce_lfp_all-window_power.pkl")


# Parsing by velocity

- Getting a mask for each segment based on velocity

In [None]:
all_bins = set(x for lst in LFP_TRACES_DF["trial_subject_thorax_velocity_binned"] for x in lst)

In [None]:
all_bins

In [None]:
baseline_power_all_window_columns = [col for col in LFP_TRACES_DF if "baseline_power_all-window" in col and "baseline-trial" not in col]
trial_power_window_averaged_columns = [col for col in LFP_TRACES_DF if "trial_power_all-window" in col and "baseline-trial" not in col]

In [None]:
baseline_power_all_window_columns

In [None]:
LFP_TRACES_DF[baseline_power_all_window_columns[0]].iloc[0].shape

In [None]:
LFP_TRACES_DF["baseline_subject_thorax-to-reward-port_distance_binned"]

In [None]:
all_bin_df = []
for bin in all_bins:
    current_df = LFP_TRACES_DF.copy()
    current_df["baseline_velocity_binned-mask"] = current_df["baseline_subject_thorax_velocity_binned"].apply(lambda x: [index for index, num in enumerate(x) if num == bin])
    current_df["trial_velocity_binned-mask"] = current_df["trial_subject_thorax_velocity_binned"].apply(lambda x: [index for index, num in enumerate(x) if num == bin])
    
    for col in baseline_power_all_window_columns:
        brain_region = "_".join(col.split("_")[:2])
        current_df["{}_power_window-averaged-velocity-parsed".format(brain_region)] = current_df.apply(lambda x: np.nanmean(np.array([x[col][i] for i in x["baseline_velocity_binned-mask"]]), axis=0), axis=1)

    for col in trial_power_window_averaged_columns:
        brain_region = "_".join(col.split("_")[:2])
        current_df["{}_power_window-averaged-velocity-parsed".format(brain_region)] = current_df.apply(lambda x: np.nanmean(np.array([x[col][i] for i in x["trial_velocity_binned-mask"]]), axis=0), axis=1)
    
    current_df["velocity_bin"] = bin
    all_bin_df.append(current_df)
        
    
    


In [None]:
VELOCITY_PARSED_LFP_TRACES_DF = pd.concat(all_bin_df).reset_index(drop=True)

## Plotting all the similar velocities together

In [None]:
power_window_averaged_columns = [col for col in VELOCITY_PARSED_LFP_TRACES_DF.columns if "power_window-averaged-velocity-parsed" in col]

- Grouping by trial outcome and velocity

In [None]:
power_window_averaged_columns

In [None]:
grouped_all_trials_df = VELOCITY_PARSED_LFP_TRACES_DF.groupby([GROUPINGS, 'velocity_bin']).agg({k: lambda x: np.vstack([arr for arr in x.tolist() if not np.any(np.isnan(arr))]) for k in power_window_averaged_columns}).reset_index()

In [None]:
grouped_all_trials_df["mPFC_baseline_power_window-averaged-velocity-parsed"].iloc[1].shape

In [None]:
grouped_all_trials_df = pd.melt(grouped_all_trials_df, id_vars =[GROUPINGS, 'velocity_bin'], value_vars =grouped_all_trials_df.drop(columns=[GROUPINGS, "velocity_bin"]).columns, value_name="power", var_name="brain_region") 

In [None]:
grouped_all_trials_df.head()

In [None]:
grouped_all_trials_df["brain_region"] = grouped_all_trials_df["brain_region"].apply(lambda x: x.split("_")[0].strip())

In [None]:
grouped_all_trials_df["trial_or_baseline"] = grouped_all_trials_df["brain_region"].apply(lambda x: x.split("_")[1].strip())

In [None]:
grouped_all_trials_df = grouped_all_trials_df[grouped_all_trials_df["trial_or_baseline"] == "trial"].reset_index(drop=True)

In [None]:
grouped_all_trials_df["mean_power"] = grouped_all_trials_df["power"].apply(lambda x: np.nanmean(np.vstack(x), axis=0))

In [None]:
grouped_all_trials_df["std_power"] = grouped_all_trials_df["power"].apply(lambda x: np.nanstd(np.vstack(x), axis=0))

In [None]:
grouped_all_trials_df["n_trials"] = grouped_all_trials_df["power"].apply(lambda x: np.sum(~np.isnan(x), axis=0))

In [None]:
grouped_all_trials_df["sem_power"] = grouped_all_trials_df.apply(lambda x: x["std_power"] / np.sqrt(x["n_trials"]), axis=1)

In [None]:
grouped_all_trials_df[GROUPINGS].unique()

In [None]:
grouped_all_trials_df

## Drawing all velocity bins together for each trial outcome

In [None]:
grouped_all_trials_df.head()

In [None]:
ALL_FREQUENCIES = [int(num) for num in VELOCITY_PARSED_LFP_TRACES_DF[[col for col in VELOCITY_PARSED_LFP_TRACES_DF if "frequencies" in col][0]].iloc[0]]

In [None]:
low_freq = 0
high_freq = 13
for region in grouped_all_trials_df["brain_region"].unique():
    region_df = grouped_all_trials_df[grouped_all_trials_df["brain_region"] == region]

    for outcome in region_df[GROUPINGS].unique():
        fig, ax = plt.subplots()
        plt.title("{} during {} trials".format(region.split("_")[0], outcome), fontsize=30)
        plt.xlabel("Frequency", fontsize=20)
        plt.ylabel("Power", fontsize=20)
        plt.xlim(low_freq, high_freq) 
        # plt.yscale("log")
        plt.ylim(0, 0.1)
        outcome_df = region_df[(region_df[GROUPINGS] == outcome)]
        # Generate gradient colors
        gradient_colors = generate_gradient_colors(OUTCOME_TO_COLOR[outcome], 20)
        
        for index, row in outcome_df.iterrows():           
            current_frequencies = ALL_FREQUENCIES[low_freq: high_freq]
            mean_power = row["mean_power"][low_freq: high_freq]
            sem_power = row["sem_power"][low_freq: high_freq]
                
            
            try:
                ax = sns.lineplot(x=current_frequencies, y=mean_power, \
                label="{}".format(BIN_TO_VELOCITY[row["velocity_bin"]]), linewidth=3, color=gradient_colors[-(row["velocity_bin"]+1) *4])
                
                plt.fill_between(current_frequencies, 
                mean_power - sem_power, mean_power + sem_power, \
                alpha=0.1, color=gradient_colors[-(row["velocity_bin"]+1) *4])
            
            except Exception as e: 
                print(e)
                continue
                
        plt.legend()
        plt.tight_layout()
        plt.savefig("./proc/velocity_parsed_power/rce_velocity_parsed_lfp_power_freq_{}_to_{}_region_{}_{}.png".format(low_freq, high_freq, region.split("_")[0], outcome))

In [None]:
low_freq = 12
high_freq = 31
for region in grouped_all_trials_df["brain_region"].unique():
    region_df = grouped_all_trials_df[grouped_all_trials_df["brain_region"] == region]

    for outcome in region_df[GROUPINGS].unique():
        fig, ax = plt.subplots()
        plt.title("Z-Scored LFP Power in {} for {}".format(region.split("_")[0], outcome))
        plt.xlabel("Frequency")
        plt.ylabel("Power")
        plt.xlim(low_freq, high_freq) 
        # plt.yscale("log")
        plt.ylim(0, 0.015)
        
        outcome_df = region_df[(region_df[GROUPINGS] == outcome)]
        # Generate gradient colors
        gradient_colors = generate_gradient_colors(OUTCOME_TO_COLOR[outcome], 20)
        
        for index, row in outcome_df.iterrows():           
            current_frequencies = ALL_FREQUENCIES[low_freq: high_freq]
            mean_power = row["mean_power"][low_freq: high_freq]
            sem_power = row["sem_power"][low_freq: high_freq]
                
            
            try:
                ax = sns.lineplot(x=current_frequencies, y=mean_power, \
                label="{} {}".format(outcome, BIN_TO_VELOCITY[row["velocity_bin"]]), linewidth=3, color=gradient_colors[-(row["velocity_bin"]+1) *4])
                
                plt.fill_between(current_frequencies, 
                mean_power - sem_power, mean_power + sem_power, \
                alpha=0.1, color=gradient_colors[-(row["velocity_bin"]+1) *4])
            
            except Exception as e: 
                print(e)
                continue
        plt.legend()
        plt.savefig("./proc/velocity_parsed_power/rce_velocity_parsed_lfp_power_freq_{}_to_{}_region_{}_{}.png".format(low_freq, high_freq, region.split("_")[0], outcome))

In [None]:
low_freq = 30
high_freq = 91
for region in grouped_all_trials_df["brain_region"].unique():
    region_df = grouped_all_trials_df[grouped_all_trials_df["brain_region"] == region]

    for outcome in region_df[GROUPINGS].unique():
        fig, ax = plt.subplots()
        plt.title("Z-Scored LFP Power in {} for {}".format(region.split("_")[0], outcome))
        plt.xlabel("Frequency")
        plt.ylabel("Power")
        plt.xlim(low_freq, high_freq) 
        # plt.yscale("log")
        plt.ylim(0, 0.004)
        outcome_df = region_df[(region_df[GROUPINGS] == outcome)]
        # Generate gradient colors
        gradient_colors = generate_gradient_colors(OUTCOME_TO_COLOR[outcome], 20)
        
        for index, row in outcome_df.iterrows():           
            current_frequencies = ALL_FREQUENCIES[low_freq: high_freq]
            mean_power = row["mean_power"][low_freq: high_freq]
            sem_power = row["sem_power"][low_freq: high_freq]
                
            
            try:
                ax = sns.lineplot(x=current_frequencies, y=mean_power, \
                label="{} {}".format(outcome, BIN_TO_VELOCITY[row["velocity_bin"]]), linewidth=3, color=gradient_colors[-(row["velocity_bin"]+1) *4])
                
                plt.fill_between(current_frequencies, 
                mean_power - sem_power, mean_power + sem_power, \
                alpha=0.1, color=gradient_colors[-(row["velocity_bin"]+1) *4])
            
            except Exception as e: 
                print(e)
                continue
        plt.legend()
        plt.savefig("./proc/velocity_parsed_power/rce_velocity_parsed_lfp_power_freq_{}_to_{}_region_{}_{}.png".format(low_freq, high_freq, region.split("_")[0], outcome))

# Plotting be velocity

In [None]:
grouped_all_trials_df = VELOCITY_PARSED_LFP_TRACES_DF.groupby(['velocity_bin']).agg({k: lambda x: np.vstack([arr for arr in x.tolist() if not np.any(np.isnan(arr))]) for k in power_window_averaged_columns}).reset_index()

In [None]:
grouped_all_trials_df = pd.melt(grouped_all_trials_df, id_vars =['velocity_bin'], value_vars =grouped_all_trials_df.drop(columns=["velocity_bin"]).columns, value_name="power", var_name="brain_region") 

In [None]:
grouped_all_trials_df.head()

In [None]:
grouped_all_trials_df["brain_region"] = grouped_all_trials_df["brain_region"].apply(lambda x: x.split("_")[0].strip())

In [None]:
grouped_all_trials_df["trial_or_baseline"] = grouped_all_trials_df["brain_region"].apply(lambda x: x.split("_")[1].strip())

In [None]:
grouped_all_trials_df["mean_power"] = grouped_all_trials_df["power"].apply(lambda x: np.nanmean(np.vstack(x), axis=0))

In [None]:
grouped_all_trials_df["std_power"] = grouped_all_trials_df["power"].apply(lambda x: np.nanstd(np.vstack(x), axis=0))

In [None]:
grouped_all_trials_df["n_trials"] = grouped_all_trials_df["power"].apply(lambda x: np.sum(~np.isnan(x), axis=0))

In [None]:
grouped_all_trials_df["sem_power"] = grouped_all_trials_df.apply(lambda x: x["std_power"] / np.sqrt(x["n_trials"]), axis=1)

In [None]:
grouped_all_trials_df.head()

# Drawing all velocity bins together for each trial outcome

In [None]:
OUTCOME_TO_COLOR

In [None]:
grouped_all_trials_df.head()

In [None]:
low_freq = 0
high_freq = 13
for region in grouped_all_trials_df["brain_region"].unique():
    region_df = grouped_all_trials_df[(grouped_all_trials_df["brain_region"] == region) & (grouped_all_trials_df["trial_or_baseline"] == "trial")]

    fig, ax = plt.subplots()
    plt.title("{} ".format(region.split("_")[0]), fontsize=30)
    plt.xlabel("Frequency", fontsize=30)
    plt.ylabel("Power", fontsize=30)
    plt.xlim(low_freq, high_freq) 
    # plt.yscale("log")
    
    # Generate gradient colors
    gradient_colors = generate_gradient_colors("red", 20)
    
    for index, row in region_df.iterrows():           

        current_frequencies = ALL_FREQUENCIES[low_freq: high_freq]
        mean_power = row["mean_power"][low_freq: high_freq]
        sem_power = row["sem_power"][low_freq: high_freq]
            
        
        try:
            ax = sns.lineplot(x=current_frequencies, y=mean_power, \
            label="{}".format(BIN_TO_VELOCITY[row["velocity_bin"]]), linewidth=5, color=gradient_colors[-(row["velocity_bin"]+1) *4])
            
            plt.fill_between(current_frequencies, 
            mean_power - sem_power, mean_power + sem_power, \
            alpha=0.1, color=gradient_colors[-(row["velocity_bin"]+1) *4])
        
        except Exception as e: 
            print(e)
            continue
    plt.tight_layout()
    plt.legend(ncol=2)
    plt.savefig("./proc/velocity_parsed_power/rce_velocity_parsed_lfp_power_freq_{}_to_{}_region_{}.png".format(low_freq, high_freq, region.split("_")[0]))

In [None]:
low_freq = 12
high_freq = 31
for region in grouped_all_trials_df["brain_region"].unique():
    region_df = grouped_all_trials_df[(grouped_all_trials_df["brain_region"] == region) & (grouped_all_trials_df["trial_or_baseline"] == "trial")]

    fig, ax = plt.subplots()
    plt.title("Z-Scored LFP Power in {} ".format(region.split("_")[0]), fontsize=30)
    plt.xlabel("Frequency", fontsize=30)
    plt.ylabel("Power", fontsize=30)
    plt.xlim(low_freq, high_freq) 
    # plt.yscale("log")
    
    # Generate gradient colors
    gradient_colors = generate_gradient_colors("red", 20)
    
    for index, row in region_df.iterrows():           

        current_frequencies = ALL_FREQUENCIES[low_freq: high_freq]
        mean_power = row["mean_power"][low_freq: high_freq]
        sem_power = row["sem_power"][low_freq: high_freq]
            
        
        try:
            ax = sns.lineplot(x=current_frequencies, y=mean_power, \
            label="{}".format(BIN_TO_VELOCITY[row["velocity_bin"]]), linewidth=5, color=gradient_colors[-(row["velocity_bin"]+1) *4])
            
            plt.fill_between(current_frequencies, 
            mean_power - sem_power, mean_power + sem_power, \
            alpha=0.1, color=gradient_colors[-(row["velocity_bin"]+1) *4])
        
        except Exception as e: 
            print(e)
            continue
    plt.legend()
    plt.savefig("./proc/velocity_parsed_power/rce_velocity_parsed_lfp_power_freq_{}_to_{}_region_{}.png".format(low_freq, high_freq, region.split("_")[0]))

In [None]:
low_freq = 30
high_freq = 90
for region in grouped_all_trials_df["brain_region"].unique():
    region_df = grouped_all_trials_df[(grouped_all_trials_df["brain_region"] == region) & (grouped_all_trials_df["trial_or_baseline"] == "trial")]

    fig, ax = plt.subplots()
    plt.title("Z-Scored LFP Power in {} ".format(region.split("_")[0]))
    plt.xlabel("Frequency")
    plt.ylabel("Power")
    plt.xlim(low_freq, high_freq) 
    # plt.yscale("log")
    
    # Generate gradient colors
    gradient_colors = generate_gradient_colors("red", 20)
    
    for index, row in region_df.iterrows():           

        current_frequencies = ALL_FREQUENCIES[low_freq: high_freq]
        mean_power = row["mean_power"][low_freq: high_freq]
        sem_power = row["sem_power"][low_freq: high_freq]
            
        
        try:
            ax = sns.lineplot(x=current_frequencies, y=mean_power, \
            label="{}".format(BIN_TO_VELOCITY[row["velocity_bin"]]), linewidth=5, color=gradient_colors[-(row["velocity_bin"]+1) *4])
            
            plt.fill_between(current_frequencies, 
            mean_power - sem_power, mean_power + sem_power, \
            alpha=0.1, color=gradient_colors[-(row["velocity_bin"]+1) *4])
        
        except Exception as e: 
            print(e)
            continue
    plt.legend()
    plt.savefig("./proc/velocity_parsed_power/rce_velocity_parsed_lfp_power_freq_{}_to_{}_region_{}.png".format(low_freq, high_freq, region.split("_")[0]))

# Parse power by lower velocity

In [None]:
LFP_TRACES_DF.head()

In [None]:

LFP_TRACES_DF["trial_velocity_binned-mask"] = LFP_TRACES_DF["trial_subject_thorax_velocity_chunked"].apply(lambda x: [index for index, num in enumerate(x) if MIN_VELOCITY <= num <= MAX_VELOCITY])

LFP_TRACES_DF["baseline_velocity_binned-mask"] = LFP_TRACES_DF["baseline_subject_thorax_velocity_chunked"].apply(lambda x: [index for index, num in enumerate(x) if MIN_VELOCITY <= num <= MAX_VELOCITY])

for col in baseline_power_all_window_columns:
    brain_region = "_".join(col.split("_")[:2])
    LFP_TRACES_DF["{}_power_window-averaged-velocity-parsed".format(brain_region)] = LFP_TRACES_DF.apply(lambda x: np.nanmean(np.array([x[col][i] for i in x["baseline_velocity_binned-mask"]]), axis=0), axis=1)

for col in trial_power_window_averaged_columns:
    brain_region = "_".join(col.split("_")[:2])
    LFP_TRACES_DF["{}_power_window-averaged-velocity-parsed".format(brain_region)] = LFP_TRACES_DF.apply(lambda x: np.nanmean(np.array([x[col][i] for i in x["trial_velocity_binned-mask"]]), axis=0), axis=1)



In [None]:
for col in LFP_TRACES_DF.columns:
    print(col)

In [None]:
VELOCITY_PARSED_LFP_TRACES_DF[GROUPINGS]

In [None]:
grouped_all_trials_df = VELOCITY_PARSED_LFP_TRACES_DF.groupby([GROUPINGS]).agg({k: lambda x: np.vstack([arr for arr in x.tolist() if not np.any(np.isnan(arr))]) for k in power_window_averaged_columns}).reset_index()

In [None]:
grouped_all_trials_df = pd.melt(grouped_all_trials_df, id_vars =[GROUPINGS], value_vars =grouped_all_trials_df.drop(columns=[GROUPINGS]).columns, value_name="power", var_name="brain_region") 

In [None]:
grouped_all_trials_df.head()

In [None]:
grouped_all_trials_df["brain_region"] = grouped_all_trials_df["brain_region"].apply(lambda x: x.split("_")[0].strip())

In [None]:
grouped_all_trials_df["trial_or_baseline"] = grouped_all_trials_df["brain_region"].apply(lambda x: x.split("_")[1].strip())

In [None]:
grouped_all_trials_df["mean_power"] = grouped_all_trials_df["power"].apply(lambda x: np.nanmean(np.vstack(x), axis=0))

In [None]:
grouped_all_trials_df["std_power"] = grouped_all_trials_df["power"].apply(lambda x: np.nanstd(np.vstack(x), axis=0))

In [None]:
grouped_all_trials_df["n_trials"] = grouped_all_trials_df["power"].apply(lambda x: np.sum(~np.isnan(x), axis=0))

In [None]:
grouped_all_trials_df["sem_power"] = grouped_all_trials_df.apply(lambda x: x["std_power"] / np.sqrt(x["n_trials"]), axis=1)

In [None]:
grouped_all_trials_df

In [None]:
OUTCOME_TO_COLOR

In [None]:
FONTSIZE=20

In [None]:
font = {'weight' : 'medium',
        'size'   : 15}

matplotlib.rc('font', **font)

In [None]:
BAND_TO_FREQ

In [None]:
BAND_TO_FREQ_PLOT = {'theta': (6, 10), 'beta': (20, 30), 'gamma': (30, 50)}
BAND_TO_FREQ_COLOR = {'theta': "red", 'beta': "blue", 'gamma': "green"}

BAND_TO_FREQ_PLOT = {'theta': (6, 10), 'gamma': (30, 50)}
BAND_TO_FREQ_COLOR = {'theta': "red", 'gamma': "green"}

In [None]:
low_freq = 0
high_freq = 51
for region in grouped_all_trials_df["brain_region"].unique():
    region_df = grouped_all_trials_df[(grouped_all_trials_df["brain_region"] == region) & (grouped_all_trials_df["trial_or_baseline"] == "trial")]

    fig, ax = plt.subplots()
    plt.title("Power Spectra {} ".format(region.split("_")[0]), fontsize=FONTSIZE)
    plt.xlabel("Frequency (Hz)", fontsize=FONTSIZE)
    plt.ylabel("Power (a.u.)", fontsize=FONTSIZE)
    plt.xlim(low_freq, high_freq) 
    plt.yscale("log")
        
    for index, row in region_df.iterrows():           

        current_frequencies = ALL_FREQUENCIES[low_freq: high_freq]
        mean_power = row["mean_power"][low_freq: high_freq]
        sem_power = row["sem_power"][low_freq: high_freq]
            
        
        try:
            ax = sns.lineplot(x=current_frequencies, y=mean_power, \
            label="{}".format(row[GROUPINGS]), linewidth=5, color=OUTCOME_TO_COLOR[row[GROUPINGS]])
            
            plt.fill_between(current_frequencies, 
            mean_power - sem_power, mean_power + sem_power, \
            alpha=0.1, color=OUTCOME_TO_COLOR[row[GROUPINGS]])
        
        except Exception as e: 
            print(e)
            continue
    for band, (min_freq, max_freq) in BAND_TO_FREQ_PLOT.items():
        ax.axvspan(min_freq, max_freq, facecolor=BAND_TO_FREQ_COLOR[band], alpha=0.1, label=band)
    plt.legend(fontsize=15, ncol=2)


    plt.tight_layout()
    plt.savefig("./proc/velocity_parsed_power/power_spectra_all_conditions/rce_velocity_parsed_lfp_power_freq_{}_to_{}_region_{}.png".format(low_freq, high_freq, region.split("_")[0]))
    plt.savefig("./proc/velocity_parsed_power/power_spectra_all_conditions/rce_velocity_parsed_lfp_power_freq_{}_to_{}_region_{}.eps".format(low_freq, high_freq, region.split("_")[0]))

In [None]:
LFP_TRACES_DF.to_pickle("./proc/rce_sleap_and_power.pkl")

In [None]:
for col in LFP_TRACES_DF:
    print(col)

# Filtering for each frequency band

In [None]:
power_window_averaged_velocity_parsed_columns = [col for col in LFP_TRACES_DF if "trial_power_window-averaged-velocity-parsed" in col]

In [None]:
LFP_TRACES_DF = LFP_TRACES_DF.dropna(subset=power_window_averaged_velocity_parsed_columns).reset_index(drop=True)

In [None]:
LFP_TRACES_DF.head()

In [None]:
for power_column in [col for col in LFP_TRACES_DF if "trial_power_window-averaged-velocity-parsed" in col]:
    region = power_column.split("_")[0]
    
    for band, (low_freq, high_freq) in BAND_TO_FREQ.items():
        region_band_column = "{}_{}_band-power_window-averaged-velocity-parsed".format(region, band)
        LFP_TRACES_DF[region_band_column] = LFP_TRACES_DF[power_column].apply(lambda x: np.mean(x[low_freq: high_freq]))

In [None]:
LFP_TRACES_DF.head()

In [None]:
BRAIN_REGIONS = ["spike_interface_mPFC",
"spike_interface_vHPC",
"spike_interface_BLA",
"spike_interface_LH",
"spike_interface_MD"]
for column in [col for col in LFP_TRACES_DF if "spike_interface" in col]:
    print(column)

In [None]:
band_power_window_averaged_velocity_parsed_columns = [col for col in LFP_TRACES_DF if "band-power_window-averaged-velocity-parsed" in col]

In [None]:
band_power_window_averaged_velocity_parsed_columns

In [None]:
region_and_band_to_mannwhitneyu = []
for col in band_power_window_averaged_velocity_parsed_columns:
    brain_region = col.split("_")[0]
    band = col.split("_")[1]
    for first_outcome, second_outcome in combinations(sorted(LFP_TRACES_DF[GROUPINGS].unique()), 2):
        first_outcome_df = LFP_TRACES_DF[LFP_TRACES_DF[GROUPINGS] == first_outcome]
        second_outcome_df = LFP_TRACES_DF[LFP_TRACES_DF[GROUPINGS] == second_outcome]
        statistic, p_value = mannwhitneyu(first_outcome_df[col], second_outcome_df[col], alternative='two-sided')
        region_and_band_to_mannwhitneyu.append({"brain_region": brain_region, "band": band, "trial_outcome": (first_outcome, second_outcome), "statistic": statistic, "p_value": p_value})

In [None]:
region_and_band_to_mannwhitneyu = pd.DataFrame(region_and_band_to_mannwhitneyu)

In [None]:
region_and_band_to_mannwhitneyu["significance"] = region_and_band_to_mannwhitneyu["p_value"].apply(lambda x: get_significance_stars_from_p_value(x))

In [None]:
region_and_band_to_mannwhitneyu = region_and_band_to_mannwhitneyu.sort_values(["band", "brain_region", "p_value"]).reset_index(drop=True)

In [None]:
region_and_band_to_mannwhitneyu

In [None]:
region_and_band_to_mannwhitneyu.to_csv("./proc/velocity_parsed_power/power_region_and_band_to_mannwhitneyu.csv")

In [None]:
raise ValueError()

In [None]:
mean_power_df = LFP_TRACES_DF.groupby(GROUPINGS)[band_power_window_averaged_velocity_parsed_columns].mean()
sem_power_df = LFP_TRACES_DF.groupby(GROUPINGS)[band_power_window_averaged_velocity_parsed_columns].sem()


In [None]:
sem_power_df

In [None]:
LFP_TRACES_DF[LFP_TRACES_DF["trial_outcome"] == "lose"]["mPFC_theta_band-power_window-averaged-velocity-parsed"].std() / len(LFP_TRACES_DF[LFP_TRACES_DF["trial_outcome"] == "lose"]) ** 0.5

In [None]:


for column in band_power_window_averaged_velocity_parsed_columns:
    fig, ax = plt.subplots(figsize=(23, 12))

    brain_region = column.split("_")[0]
    band = column.split("_")[1]
    plt.title("Power of {} {} Band".format(brain_region, band.capitalize()), fontsize=80)
    plt.ylabel("Power (a.u.)", fontsize=80)
    bars = plt.bar(mean_power_df.index, mean_power_df[column], color=mean_power_df.index.map(OUTCOME_TO_COLOR))
    
    plt.errorbar(mean_power_df.index, mean_power_df[column],
        yerr=sem_power_df[column],
        color='k',
        capsize=30,
        linestyle='None',
        elinewidth=7,
        capthick=7)
    
    plt.ylim(0, BAND_TO_YLIM[band])
    plt.xticks(fontsize=60)
    plt.yticks(fontsize=60)
    plt.locator_params(axis='y', nbins=4)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.tight_layout()
    plt.savefig("./proc/velocity_parsed_power/bar_plots/{}_{}_velocity_parsed_lfp_power_bar.eps".format(band, brain_region)) #nancy changed png to eps
    plt.savefig("./proc/velocity_parsed_power/bar_plots/{}_{}_velocity_parsed_lfp_power_bar.png".format(band, brain_region)) #nancy changed png to eps


In [None]:


for column in band_power_window_averaged_velocity_parsed_columns:
    fig, ax = plt.subplots(figsize=(12, 12))
    
    brain_region = column.split("_")[0]
    band = column.split("_")[1]
    plt.title("Power of {} {} Band".format(brain_region, band.capitalize()), fontsize=40)
    plt.ylabel("Power (a.u.)", fontsize=40)

    all_outcome_data = [LFP_TRACES_DF[LFP_TRACES_DF["trial_outcome"] == trial_outcome][column] for trial_outcome in LFP_TRACES_DF["trial_outcome"].unique()]
    
    bplot = ax.boxplot(all_outcome_data,
                         vert=True,  # vertical box alignment
                         patch_artist=True,  # fill with color
                         labels=LFP_TRACES_DF["trial_outcome"].unique())  # will be used to label x-ticks

    # fill with colors
    for patch, trail_outcome in zip(bplot['boxes'], LFP_TRACES_DF["trial_outcome"].unique()):
        patch.set_facecolor(OUTCOME_TO_COLOR[trail_outcome])

    plt.xticks(fontsize=30)
    plt.yticks(fontsize=40)
    plt.tight_layout()
    plt.savefig("./proc/velocity_parsed_power/box_plots/{}_{}_velocity_parsed_lfp_box_plot.eps".format(band, brain_region)) #nancy changed png to eps
    plt.savefig("./proc/velocity_parsed_power/box_plots/{}_{}_velocity_parsed_lfp_box_plot.png".format(band, brain_region)) #nancy changed png to eps
    
    # bars = plt.bar(mean_power_df.index, mean_power_df[column], color=mean_power_df.index.map(OUTCOME_TO_COLOR))
    
    # plt.errorbar(mean_power_df.index, mean_power_df[column],
    #     yerr=sem_power_df[column],
    #     color='k',
    #     capsize=30,
    #     linestyle='None',
    #     elinewidth=7,
    #     capthick=7)
    
    # plt.ylim(0, BAND_TO_YLIM[band])

    # plt.locator_params(axis='y', nbins=4)
    # ax.spines['top'].set_visible(False)
    # ax.spines['right'].set_visible(False)




# OLD CODE BELOW Exporting Velocity Parsed dataframe for plotting

In [None]:
raise ValueError()

In [None]:
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF if "lfp_trace" in col], errors="ignore")

In [None]:
LFP_TRACES_DF["mPFC_trial_power_window-averaged-velocity-parsed"].iloc[0].shape

In [None]:
for col in LFP_TRACES_DF:
    print(col)

In [None]:
LFP_TRACES_DF.to_pickle("./proc/rce_sleap_and_power.pkl")

In [None]:
LFP_TRACES_DF["recording_file"].unique()

In [None]:
EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF = VELOCITY_PARSED_LFP_TRACES_DF[VELOCITY_PARSED_LFP_TRACES_DF["velocity_bin"] == 0].copy()

In [None]:
EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF = EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF.drop(columns=[col for col in EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF.columns if "baseline" in col], errors="ignore")

In [None]:
EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF = EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF.drop(columns=[col for col in EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF.columns if "trace" in col], errors="ignore")

In [None]:
EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF = pd.melt(EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF, id_vars=EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF.columns[:19], value_vars=[col for col in EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF.columns if "power_window-averaged-velocity-parsed" in col], value_name="power", var_name="brain_region")




In [None]:
EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF["brain_region"] = EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF["brain_region"].apply(lambda x:x.split("_")[0])
EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF["trial_or_baseline"] = EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF["brain_region"].apply(lambda x:x.split("_")[1])

In [None]:
EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF = EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF.dropna(subset=["power"])

In [None]:
for band, (min_freq, max_freq) in BAND_TO_FREQ.items():
    EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF["mean_{}_power".format(band)] = EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF["power"].apply(lambda x: np.nanmean(x[min_freq:max_freq]))

In [None]:
EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF.to_pickle("./proc/rce_velocity_parsed_power_spectra_region_and_baselinetrial_rows.pkl")

In [None]:
EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF

In [None]:
for col in EXPORTED_VELOCITY_PARSED_LFP_TRACES_DF.columns[:19]:
    print(col)

# Calculating the velocity parsed average power for each frequency band

In [None]:
BAND_TO_FREQ

In [None]:
velocity_parsed_power_window_averaged_columns = [col for col in LFP_TRACES_DF.columns if "power_window-averaged-velocity-parsed" in col]

In [None]:
velocity_parsed_power_window_averaged_columns

- Removing all trials that have a high velocity trhoguhout

In [None]:
LFP_TRACES_DF = LFP_TRACES_DF[LFP_TRACES_DF["trial_velocity_binned-mask"].map(len) >= 1]

In [None]:
for col in velocity_parsed_power_window_averaged_columns:
    region_base_name = "_".join(col.split("_")[:2])
    print(region_base_name)
    for band, (min_freq, max_freq) in BAND_TO_FREQ.items():
        print(band)
        LFP_TRACES_DF["{}_{}_all_power".format(region_base_name, band)] = LFP_TRACES_DF[col].apply(lambda x: x[min_freq:max_freq])
        LFP_TRACES_DF["{}_{}_mean_power".format(region_base_name, band)] = LFP_TRACES_DF["{}_{}_all_power".format(region_base_name, band)].apply(lambda x: x.mean())

In [None]:
# MD_trial_mean_gamma_power
mean_power_col = [col for col in LFP_TRACES_DF.columns if "mean_power" in col]

In [None]:
mean_power_df = pd.DataFrame(LFP_TRACES_DF.groupby([GROUPINGS])[mean_power_col].mean()).reset_index()

In [None]:
mean_power_df["color"] = mean_power_df[GROUPINGS].map(OUTCOME_TO_COLOR)

In [None]:
sem_power_df = pd.DataFrame(LFP_TRACES_DF.groupby([GROUPINGS])[mean_power_col].sem()).reset_index()

In [None]:
mean_power_df

In [None]:
OUTCOME_COMPARISONS

In [None]:
rank_sum_dict = []
for col in mean_power_col:
    region = col.split("_")[0]
    baseline_or_trial = col.split("_")[1]
    band = col.split("_")[2]
    if baseline_or_trial != "trial":
        continue
    for key, (first_outcome, second_outcome) in  OUTCOME_COMPARISONS.items():
        first_df = LFP_TRACES_DF[LFP_TRACES_DF[GROUPINGS] == first_outcome]
        second_df = LFP_TRACES_DF[LFP_TRACES_DF[GROUPINGS] == second_outcome]
        statistic, p_value = mannwhitneyu(first_df[col], second_df[col], alternative='two-sided')
        rank_sum_dict.append({"region": region, "band": band, "comparison": key, "mannwhitneyu_statistic": statistic, "mannwhitneyu_pvalue": p_value, "all_outcomes": set([first_outcome, second_outcome])})
rank_sum_df = pd.DataFrame(rank_sum_dict)

In [None]:
rank_sum_df.head()

In [None]:
current_df

In [None]:
round(mean_power_df[[col for col in mean_power_df.columns if "theta" in col]].to_numpy().max() * 1.25, 3)

In [None]:
LFP_TRACES_DF = LFP_TRACES_DF.copy()
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF if "multitaper" in col], errors="ignore").copy()
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF if "connectivity" in col], errors="ignore")
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF if "frequencies" in col], errors="ignore")
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF if "spike_interface" in col], errors="ignore")
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF if "power_window-averaged-velocity-parsed" in col], errors="ignore")
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF if "all-window_power" in col], errors="ignore")
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF if "all_power" in col], errors="ignore")
LFP_TRACES_DF = LFP_TRACES_DF.drop(columns=[col for col in LFP_TRACES_DF if "window-averaged_power" in col], errors="ignore")

