# Notebook 1: Extract Z-scored LFP

Brief 1-2 sentence description of notebook.

In [1]:
# Imports of all used packages and libraries
import os
import glob
import numpy as np
import pandas as pd

In [2]:
import spikeinterface.extractors as se
import spikeinterface.preprocessing as sp

## Inputs & Data

Explanation of each input and where it comes from.

In [3]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc/" # where data is saved should always be shown in the inputs
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [4]:
CHANNEL_MAPPING_DF = pd.read_excel("../../data/channel_mapping.xlsx")
TONE_TIMESTAMP_DF = pd.read_excel("../../data/rce_tone_timestamp.xlsx", index_col=0)

In [5]:
EPHYS_SAMPLING_RATE = 20000
LFP_SAMPLING_RATE = 1000
TRIAL_DURATION = 10
FRAME_RATE = 22
ECU_STREAM_ID = "ECU"
TRODES_STREAM_ID = "trodes"
LFP_FREQ_MIN = 0.5
LFP_FREQ_MAX = 300
ELECTRIC_NOISE_FREQ = 60
RECORDING_EXTENTION = "*.rec"

In [6]:
ALL_SESSION_DIR = list(set(['/scratch/back_up/reward_competition_extention/data/omission/2023_06_17/20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.rec',
'/scratch/back_up/reward_competition_extention/data/omission/2023_06_18/20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1.rec',
'/scratch/back_up/reward_competition_extention/data/omission/2023_06_19/20230619_115321_standard_comp_to_omission_D3_subj_1-2_and_1-4.rec',
'/scratch/back_up/reward_competition_extention/data/omission/2023_06_20/20230620_114347_standard_comp_to_omission_D4_subj_1-2_and_1-1.rec',
'/scratch/back_up/reward_competition_extention/data/omission/2023_06_21/20230621_111240_standard_comp_to_omission_D5_subj_1-4_and_1-2.rec'
                   ]))

In [7]:
ALL_SESSION_DIR = glob.glob("/scratch/back_up/reward_competition_extention/data/omission/*/*.rec")

In [8]:
ALL_SESSION_DIR

['/scratch/back_up/reward_competition_extention/data/omission/2023_12_14/20221214_125409_om_and_comp_6_1_and_6_3.rec',
 '/scratch/back_up/reward_competition_extention/data/omission/2023_12_02/20221202_134600_omission_and_competition_subject_6_1_and_6_2.rec',
 '/scratch/back_up/reward_competition_extention/data/omission/2023_12_15/20221215_145401_comp_amd_om_6_1_and_6_3.rec',
 '/scratch/back_up/reward_competition_extention/data/omission/2023_06_17/20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2.rec',
 '/scratch/back_up/reward_competition_extention/data/omission/2023_06_17/20230617_115641_standard_comp_to_omission_D1_subj_2-2_and_2-4.rec',
 '/scratch/back_up/reward_competition_extention/data/omission/2023_06_21/20230621_111240_standard_comp_to_omission_D5_subj_1-4_and_1-2.rec',
 '/scratch/back_up/reward_competition_extention/data/omission/2023_06_20/20230620_114347_standard_comp_to_omission_D4_subj_1-2_and_1-1.rec',
 '/scratch/back_up/reward_competition_extention/data/omiss

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

# Functions

In [9]:
def compute_sorted_index(group, value_column='Value', index_column='SortedIndex'):
    """ 
    Computes the index of each row's value within its sorted group.

    Parameters:
    - group (pd.DataFrame): A group of data.
    - value_column (str): Name of the column containing the values to be sorted.
    - index_column (str): Name of the new column that will contain the indices.

    Returns:
    - pd.DataFrame: The group with an additional column containing the indices.
    """
    sorted_values = sorted(list(set(group[value_column].tolist())))
    group[index_column] = group[value_column].apply(lambda x: sorted_values.index(x))
    return group

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [10]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names




# Reformatting Dataframe

- Dropping all rows that have not been labeled

In [11]:
all_trials_df = TONE_TIMESTAMP_DF.dropna(subset="condition").reset_index(drop=True)

In [12]:
sorted(all_trials_df["recording_dir"].unique())

['20221202_134600_omission_and_competition_subject_6_1_and_6_2',
 '20221203_154800_omission_and_competition_subject_6_4_and_6_1',
 '20221214_125409_om_and_comp_6_1_and_6_3',
 '20221215_145401_comp_amd_om_6_1_and_6_3',
 '20230612_101430_standard_comp_to_training_D1_subj_1-4_and_1-3',
 '20230617_115521_standard_comp_to_omission_D1_subj_1-1_and_1-2',
 '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1',
 '20230619_115321_standard_comp_to_omission_D3_subj_1-2_and_1-4',
 '20230620_114347_standard_comp_to_omission_D4_subj_1-2_and_1-1',
 '20230621_111240_standard_comp_to_omission_D5_subj_1-4_and_1-2']

- Making the video frame number usable

In [13]:
all_trials_df["video_frame"] = all_trials_df["video_frame"].astype(int)

- Getting the name of the video so that we can sync it up with the ephys recording

In [14]:
all_trials_df["video_name"]  = all_trials_df["video_file"].apply(lambda x: x.strip(".videoTimeStamps.cameraHWSync"))

- Getting all subject IDs for a given recording

In [15]:
# using different id extractions for different file formats
all_trials_df["all_subjects"] = all_trials_df["recording_dir"].apply(lambda x: x if "2023" in x else "subj" + "_".join(x.split("_")[-5:]))
all_trials_df["all_subjects"] = all_trials_df["all_subjects"].apply(lambda x: tuple(sorted([num.strip("_").replace("_",".") for num in x.replace("-", "_").split("subj")[-1].strip("_").split("and")])))

In [16]:
all_trials_df["all_subjects"].unique()

array([('6.1', '6.2'), ('6.1', '6.4'), ('6.1', '6.3'), ('1.3', '1.4'),
       ('1.1', '1.2'), ('1.1', '1.4'), ('1.2', '1.4')], dtype=object)

In [17]:
all_trials_df["current_subject"] = all_trials_df["subject_info"].apply(lambda x: ".".join(x.replace("-","_").split("_")[:2])).astype(str)

In [18]:
all_trials_df["current_subject"].unique()

array(['6.1', '1.3', '1.4', '1.1', '1.2'], dtype=object)

- Converting the trial label to win or lose based on who won the trial

In [19]:
all_trials_df["trial_outcome"] = all_trials_df.apply(
    lambda x: "win" if str(x["condition"]).strip() == str(x["current_subject"]) 
             else ("lose" if str(x["condition"]) in x["all_subjects"] 
                   else x["condition"]), axis=1)

In [20]:
all_trials_df["trial_outcome"].unique()

array(['rewarded', 'omission', 'win', 'lose'], dtype=object)

- Adding the competition closeness as a column

In [21]:
competition_closeness_map = {k: "non_comp" if "only" in str(k).lower() else "comp" if type(k) is str else np.nan for k in all_trials_df["competition_closeness"].unique()}

In [22]:
competition_closeness_map

{nan: nan,
 'Subj 1 Only': 'non_comp',
 'Subj 2 blocking Subj 1': 'comp',
 'Subj 1 then Subj 2': 'comp',
 'Subj 1 blocking Subj 2': 'comp',
 'Subj 2 Only': 'non_comp',
 'Subj 2 then Subj 1': 'comp',
 'Close Call': 'comp'}

In [23]:
all_trials_df["competition_closeness"] = all_trials_df["competition_closeness"].map(competition_closeness_map)

In [24]:
all_trials_df["competition_closeness"] = all_trials_df.apply(lambda x: "_".join([str(x["trial_outcome"]), str(x["competition_closeness"])]).strip("nan").strip("_"), axis=1)

In [25]:
all_trials_df["competition_closeness"].unique()

array(['rewarded', 'omission', 'win_non_comp', 'win_comp',
       'lose_non_comp', 'lose_comp'], dtype=object)

- Adding the LFP index

In [26]:
all_trials_df["lfp_index"] = (all_trials_df["time_stamp_index"] // (EPHYS_SAMPLING_RATE/LFP_SAMPLING_RATE)).astype(int)

In [27]:
all_trials_df["time"] = all_trials_df["time"].astype(int)

In [28]:
all_trials_df["time_stamp_index"] = all_trials_df["time_stamp_index"].astype(int)

- Removing unnecessary columns

In [29]:
all_trials_df = all_trials_df.drop(columns=["state", "din", "condition", "Unnamed: 13"], errors="ignore")

In [30]:
all_trials_df.head()

Unnamed: 0,time,recording_dir,recording_file,time_stamp_index,video_file,video_frame,video_number,subject_info,competition_closeness,video_name,all_subjects,current_subject,trial_outcome,lfp_index
0,6310663,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,1390826,20221202_134600_omission_and_competition_subje...,1734,1.0,6_1_top_2_base_3,rewarded,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,rewarded,69541
1,7910662,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,2990825,20221202_134600_omission_and_competition_subje...,3728,1.0,6_1_top_2_base_3,rewarded,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,rewarded,149541
2,9710660,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,4790823,20221202_134600_omission_and_competition_subje...,5972,1.0,6_1_top_2_base_3,rewarded,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,rewarded,239541
3,11310658,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,6390821,20221202_134600_omission_and_competition_subje...,7966,1.0,6_1_top_2_base_3,omission,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,omission,319541
4,12810657,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,7890820,20221202_134600_omission_and_competition_subje...,9836,1.0,6_1_top_2_base_3,rewarded,20221202_134600_omission_and_competition_subje...,"(6.1, 6.2)",6.1,rewarded,394541


- Making columns of the different timestamps

In [31]:
all_trials_df["baseline_lfp_timestamp_range"] = all_trials_df["lfp_index"].apply(lambda x: (x - TRIAL_DURATION * LFP_SAMPLING_RATE, x))

In [32]:
all_trials_df["trial_lfp_timestamp_range"] = all_trials_df["lfp_index"].apply(lambda x: (x, x + TRIAL_DURATION * LFP_SAMPLING_RATE))

In [33]:
all_trials_df["baseline_ephys_timestamp_range"] = all_trials_df["time_stamp_index"].apply(lambda x: (x - TRIAL_DURATION * EPHYS_SAMPLING_RATE, x))

In [34]:
all_trials_df["trial_ephys_timestamp_range"] = all_trials_df["time_stamp_index"].apply(lambda x: (x, x + TRIAL_DURATION * EPHYS_SAMPLING_RATE))

In [35]:
all_trials_df["baseline_videoframe_range"] = all_trials_df["video_frame"].apply(lambda x: (x - TRIAL_DURATION * FRAME_RATE, x))

In [36]:
all_trials_df["trial_videoframe_range"] = all_trials_df["video_frame"].apply(lambda x: (x, x + TRIAL_DURATION * FRAME_RATE))

# Extracting the LFP

In [37]:
recording_name_to_all_ch_lfp = {}
# Going through all the recording sessions 
for session_dir in ALL_SESSION_DIR:
    # Going through all the recordings in each session
    for recording_path in glob.glob(os.path.join(session_dir, RECORDING_EXTENTION)):
        try:
            recording_basename = os.path.splitext(os.path.basename(recording_path))[0]
            # checking to see if the recording has an ECU component
            # if it doesn't, then the next one be extracted
            current_recording = se.read_spikegadgets(recording_path, stream_id=ECU_STREAM_ID)
            current_recording = se.read_spikegadgets(recording_path, stream_id=TRODES_STREAM_ID)
            print(recording_basename)
            # Preprocessing the LFP
            current_recording = sp.bandpass_filter(current_recording, freq_min=LFP_FREQ_MIN, freq_max=LFP_FREQ_MAX)
            current_recording = sp.notch_filter(current_recording, freq=ELECTRIC_NOISE_FREQ)
            current_recording = sp.resample(current_recording, resample_rate=LFP_SAMPLING_RATE)
            current_recording = sp.zscore(current_recording)
            recording_name_to_all_ch_lfp[recording_basename] = current_recording
        except Exception as error:
            # handle the exception
            print("An exception occurred:", error) # An exception occurred: division by zero




20221214_125409_om_and_comp_6_1_top_1_base_2_vs_6_3
An exception occurred: stream_id trodes is not in ['ECU']
20221202_134600_omission_and_competition_subject_6_1_top_2_base_3_merged


KeyboardInterrupt: 

- Filtering for all trials that we got the LFP for

In [None]:
all_trials_df = all_trials_df[all_trials_df["recording_file"].isin(recording_name_to_all_ch_lfp.keys())].reset_index(drop=True)

In [None]:
all_trials_df.head()

- Adding trial numbers based on timestamp ordering for each recording

In [None]:
all_trials_df = all_trials_df.groupby('recording_file').apply(lambda g: compute_sorted_index(g, value_column='time', index_column='trial_number')).reset_index(drop=True)

In [None]:
all_trials_df["trial_number"].unique()

## Adding the LFP trace information

In [None]:
CHANNEL_MAPPING_DF

- Adding all the brain region to ch information

In [None]:
CHANNEL_MAPPING_DF["Subject"] = CHANNEL_MAPPING_DF["Subject"].astype(str)

In [None]:
channel_map_and_all_trials_df = all_trials_df.merge(CHANNEL_MAPPING_DF, left_on="current_subject", right_on="Subject", how="left")

In [None]:
channel_map_and_all_trials_df = channel_map_and_all_trials_df.drop(columns=[col for col in channel_map_and_all_trials_df.columns if "eib" in col], errors="ignore")

In [None]:
channel_map_and_all_trials_df = channel_map_and_all_trials_df.drop(columns=["Subject"], errors="ignore")

In [None]:
channel_map_and_all_trials_df.head()

In [None]:
channel_map_and_all_trials_df.to_csv("./proc/trial_metadata.csv")

In [None]:
channel_map_and_all_trials_df.to_pickle("./proc/trial_metadata.pkl")

In [None]:
channel_map_and_all_trials_df.columns

- Linking up all LFP calculations with all the trials

In [None]:
channel_map_and_all_trials_df["all_ch_lfp"] = channel_map_and_all_trials_df["recording_file"].map(recording_name_to_all_ch_lfp)

- Creating a new row for each brain region

In [None]:
brain_region_col = [col for col in CHANNEL_MAPPING_DF if "spike_interface" in col]

In [None]:
id_cols = [col for col in channel_map_and_all_trials_df.columns if col not in brain_region_col]

In [None]:
brain_region_col

In [None]:
for col in brain_region_col:
    channel_map_and_all_trials_df[col] = channel_map_and_all_trials_df[col].astype(int).astype(str)

In [None]:
channel_map_and_all_trials_df.columns

In [None]:
for col in brain_region_col:
    print(col)
    channel_map_and_all_trials_df["{}_baseline_lfp_trace".format(col.strip("spike_interface").strip("_"))] = channel_map_and_all_trials_df.apply(lambda row: row["all_ch_lfp"].get_traces(channel_ids=[row[col]], start_frame=row["baseline_lfp_timestamp_range"][0], end_frame=row["baseline_lfp_timestamp_range"][1]).T[0], axis=1)

    channel_map_and_all_trials_df["{}_trial_lfp_trace".format(col.strip("spike_interface").strip("_"))] = channel_map_and_all_trials_df.apply(lambda row: row["all_ch_lfp"].get_traces(channel_ids=[row[col]], start_frame=row["trial_lfp_timestamp_range"][0], end_frame=row["trial_lfp_timestamp_range"][1]).T[0], axis=1)


In [None]:
channel_map_and_all_trials_df.to_pickle("./proc/full_baseline_and_trial_lfp_traces.pkl")

In [None]:
channel_map_and_all_trials_df.head()