# Title of notebook

Brief 1-2 sentence description of notebook.

In [1]:
import glob
import re
import os

In [2]:
# Imports of all used packages and libraries
import numpy as np
import pandas as pd

## Inputs & Data

Explanation of each input and where it comes from.

In [3]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case


TONE_TIMESTAMP_DF = pd.read_excel("../../rce_tone_timestamp.xlsx", index_col=0)
OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs

INPUT_DIR=glob.glob("/scratch/back_up/reward_competition_extention/proc/spike_sorting/*")

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [4]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names

def calc_bmi(weight, height):
    """
    This is a function that calculates BMI.
    it uses height and weight...etc.
    Meghan plz show us your docsctring format here.
    """
    bmi = weight/(height**2)
    return(bmi)


In [81]:
def find_closest(target, reference_list):
    """
    Finds the closest number in a reference list to the given target number.

    Parameters:
    - target (float or int): The number for which we want to find the closest value.
    - reference_list (list of float or int): The list of numbers in which we want to search.

    Returns:
    - float or int: The closest number from the reference list to the target.
    """

    # Using the 'min' function with a custom key to determine the closest value by minimal absolute difference
    closest_value = min(reference_list, key=lambda x: abs(x - target))
    
    return closest_value

### Getting the subject IDs from the file name

In [5]:
all_trials_df = TONE_TIMESTAMP_DF.dropna(subset="condition").sort_values(by=["recording_file", "time_stamp_index"]).reset_index(drop=True)

In [6]:
all_trials_df["time"] = all_trials_df["time"].astype(int)
all_trials_df["time_stamp_index"] = all_trials_df["time_stamp_index"].astype(int)
all_trials_df["video_frame"] = all_trials_df["video_frame"].astype(int)

In [7]:
all_trials_df.head()

Unnamed: 0,time,state,recording_dir,recording_file,din,time_stamp_index,video_file,video_frame,video_number,subject_info,condition,competition_closeness,Unnamed: 13
0,6310663,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,1390826,20221202_134600_omission_and_competition_subje...,1734,1.0,6_1_top_2_base_3,rewarded,,
1,7910662,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,2990825,20221202_134600_omission_and_competition_subje...,3728,1.0,6_1_top_2_base_3,rewarded,,
2,9710660,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,4790823,20221202_134600_omission_and_competition_subje...,5972,1.0,6_1_top_2_base_3,rewarded,,
3,11310658,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,6390821,20221202_134600_omission_and_competition_subje...,7966,1.0,6_1_top_2_base_3,omission,,
4,12810657,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,7890820,20221202_134600_omission_and_competition_subje...,9836,1.0,6_1_top_2_base_3,rewarded,,


- Original timestamps are based on ephys recordings at 20kHz. The LFP will be at 1kHz, so we will need to divide all the timestamps by 20

In [8]:
all_trials_df["resampled_index"] = all_trials_df["time_stamp_index"] // 20

In [9]:
all_trials_df["recording_dir"].unique()

array(['20221202_134600_omission_and_competition_subject_6_1_and_6_2',
       '20221203_154800_omission_and_competition_subject_6_4_and_6_1',
       '20221214_125409_om_and_comp_6_1_and_6_3',
       '20221215_145401_comp_amd_om_6_1_and_6_3',
       '20230612_101430_standard_comp_to_training_D1_subj_1-4_and_1-3',
       '20230618_100636_standard_comp_to_omission_D2_subj_1-4_and_1-1',
       '20230619_115321_standard_comp_to_omission_D3_subj_1-2_and_1-4',
       '20230620_114347_standard_comp_to_omission_D4_subj_1-2_and_1-1',
       '20230621_111240_standard_comp_to_omission_D5_subj_1-4_and_1-2'],
      dtype=object)

- Getting a list of all the subjects through the recording name

In [10]:
all_trials_df["all_subjects"] = all_trials_df["recording_dir"].apply(lambda x: ["{}.{}".format(tup[0],tup[1]) for tup in re.findall(r'(\d+)-(\d+)', x.replace("_", "-"))[1:]])

In [11]:
all_trials_df["all_subjects"].head()

0    [6.1, 6.2]
1    [6.1, 6.2]
2    [6.1, 6.2]
3    [6.1, 6.2]
4    [6.1, 6.2]
Name: all_subjects, dtype: object

- Getting the current subject of the recording through the ending of the recording name file

In [12]:
all_trials_df["subject_info"].head()

0    6_1_top_2_base_3
1    6_1_top_2_base_3
2    6_1_top_2_base_3
3    6_1_top_2_base_3
4    6_1_top_2_base_3
Name: subject_info, dtype: object

In [13]:
all_trials_df["current_subject"] = all_trials_df["subject_info"].apply(lambda x: ".".join(x.replace("-","_").split("_")[:2]))

In [14]:
all_trials_df.head()

Unnamed: 0,time,state,recording_dir,recording_file,din,time_stamp_index,video_file,video_frame,video_number,subject_info,condition,competition_closeness,Unnamed: 13,resampled_index,all_subjects,current_subject
0,6310663,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,1390826,20221202_134600_omission_and_competition_subje...,1734,1.0,6_1_top_2_base_3,rewarded,,,69541,"[6.1, 6.2]",6.1
1,7910662,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,2990825,20221202_134600_omission_and_competition_subje...,3728,1.0,6_1_top_2_base_3,rewarded,,,149541,"[6.1, 6.2]",6.1
2,9710660,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,4790823,20221202_134600_omission_and_competition_subje...,5972,1.0,6_1_top_2_base_3,rewarded,,,239541,"[6.1, 6.2]",6.1
3,11310658,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,6390821,20221202_134600_omission_and_competition_subje...,7966,1.0,6_1_top_2_base_3,omission,,,319541,"[6.1, 6.2]",6.1
4,12810657,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,7890820,20221202_134600_omission_and_competition_subje...,9836,1.0,6_1_top_2_base_3,rewarded,,,394541,"[6.1, 6.2]",6.1


- Labeling the trial as a winner or loser if the winner matches the subject id or not

In [15]:
all_trials_df["trial_outcome"] = all_trials_df.apply(
    lambda x: "win" if str(x["condition"]).strip() == str(x["current_subject"]) 
             else ("lose" if str(x["condition"]) in x["all_subjects"] 
                   else x["condition"]), axis=1)

In [16]:
all_trials_df.head()

Unnamed: 0,time,state,recording_dir,recording_file,din,time_stamp_index,video_file,video_frame,video_number,subject_info,condition,competition_closeness,Unnamed: 13,resampled_index,all_subjects,current_subject,trial_outcome
0,6310663,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,1390826,20221202_134600_omission_and_competition_subje...,1734,1.0,6_1_top_2_base_3,rewarded,,,69541,"[6.1, 6.2]",6.1,rewarded
1,7910662,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,2990825,20221202_134600_omission_and_competition_subje...,3728,1.0,6_1_top_2_base_3,rewarded,,,149541,"[6.1, 6.2]",6.1,rewarded
2,9710660,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,4790823,20221202_134600_omission_and_competition_subje...,5972,1.0,6_1_top_2_base_3,rewarded,,,239541,"[6.1, 6.2]",6.1,rewarded
3,11310658,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,6390821,20221202_134600_omission_and_competition_subje...,7966,1.0,6_1_top_2_base_3,omission,,,319541,"[6.1, 6.2]",6.1,omission
4,12810657,1.0,20221202_134600_omission_and_competition_subje...,20221202_134600_omission_and_competition_subje...,dio_ECU_Din1,7890820,20221202_134600_omission_and_competition_subje...,9836,1.0,6_1_top_2_base_3,rewarded,,,394541,"[6.1, 6.2]",6.1,rewarded


# Reading in Phy

- Reading in a spreadsheet of all the unit classifications
    - They are divided up into good units, multi-units, and noise

In [17]:
recording_to_cluster_info = {}
for recording_dir in INPUT_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        file_path = os.path.join(recording_dir, "phy", "cluster_info.tsv")
        recording_to_cluster_info[recording_basename] = pd.read_csv(file_path, sep="\t")
    except Exception as e:
        print(e)

[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20221125_152723_competition_subject_6_1_top_3_base_2_merged.rec/phy/cluster_info.tsv'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230616_111904_standard_comp_to_training_D4_subj_1-4_t4b3L_box1_merged.rec/phy/cluster_info.tsv'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230612_101430_standard_comp_to_training_D1_subj_1-4_t4b2L_box1_merged.rec/phy/cluster_info.tsv'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20221202_134600_omission_and_competition_subject_6_1_top_2_base_3_merged.rec/phy/cluster_info.tsv'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230622_110832_standard_comp_to_both_rewarded_D1_subj_1-1_t1b3L_box1_merged.rec/phy/cluster_info.tsv'
[

In [18]:
recording_to_cluster_info[list(recording_to_cluster_info.keys())[0]]

Unnamed: 0,cluster_id,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id
0,0,9.704487,0,0,0.0,8.615203,mua,29547,0,1
1,1,15.160955,0,0,0.0,2.351562,good,8065,0,2
2,2,10.994174,1,0,20.0,0.631263,good,2165,0,3
3,3,9.616855,1,0,20.0,5.323891,mua,18259,0,4
4,4,13.022879,2,0,40.0,9.569241,good,32819,0,5
5,5,9.269071,2,0,40.0,2.56937,mua,8812,0,6
6,7,8.4668,2,0,40.0,2.642555,good,9063,0,8
7,8,9.198713,3,0,60.0,0.217516,mua,746,0,9
8,9,12.508228,3,0,60.0,1.605419,good,5506,0,10
9,13,10.283003,10,0,200.0,1.223162,mua,4195,0,14


- Combining all the unit info dataframes and adding the recording name

In [19]:
recording_to_cluster_info_df = pd.concat(recording_to_cluster_info, names=['recording_name']).reset_index(level=1, drop=True).reset_index()


In [20]:
recording_to_cluster_info_df.head()

Unnamed: 0,recording_name,cluster_id,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id
0,20230618_100636_standard_comp_to_omission_D2_s...,0,9.704487,0,0.0,0.0,8.615203,mua,29547,0,1.0
1,20230618_100636_standard_comp_to_omission_D2_s...,1,15.160955,0,0.0,0.0,2.351562,good,8065,0,2.0
2,20230618_100636_standard_comp_to_omission_D2_s...,2,10.994174,1,0.0,20.0,0.631263,good,2165,0,3.0
3,20230618_100636_standard_comp_to_omission_D2_s...,3,9.616855,1,0.0,20.0,5.323891,mua,18259,0,4.0
4,20230618_100636_standard_comp_to_omission_D2_s...,4,13.022879,2,0.0,40.0,9.569241,good,32819,0,5.0


- Filtering for the good units

In [21]:
good_unit_cluster_info_df = recording_to_cluster_info_df[recording_to_cluster_info_df["group"] == "good"].reset_index(drop=True)

In [22]:
good_unit_cluster_info_df.head()

Unnamed: 0,recording_name,cluster_id,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id
0,20230618_100636_standard_comp_to_omission_D2_s...,1,15.160955,0,0.0,0.0,2.351562,good,8065,0,2.0
1,20230618_100636_standard_comp_to_omission_D2_s...,2,10.994174,1,0.0,20.0,0.631263,good,2165,0,3.0
2,20230618_100636_standard_comp_to_omission_D2_s...,4,13.022879,2,0.0,40.0,9.569241,good,32819,0,5.0
3,20230618_100636_standard_comp_to_omission_D2_s...,7,8.4668,2,0.0,40.0,2.642555,good,9063,0,8.0
4,20230618_100636_standard_comp_to_omission_D2_s...,9,12.508228,3,0.0,60.0,1.605419,good,5506,0,10.0


In [23]:
good_unit_cluster_info_df

Unnamed: 0,recording_name,cluster_id,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id
0,20230618_100636_standard_comp_to_omission_D2_s...,1,15.160955,0,0.0,0.0,2.351562,good,8065,0,2.0
1,20230618_100636_standard_comp_to_omission_D2_s...,2,10.994174,1,0.0,20.0,0.631263,good,2165,0,3.0
2,20230618_100636_standard_comp_to_omission_D2_s...,4,13.022879,2,0.0,40.0,9.569241,good,32819,0,5.0
3,20230618_100636_standard_comp_to_omission_D2_s...,7,8.466800,2,0.0,40.0,2.642555,good,9063,0,8.0
4,20230618_100636_standard_comp_to_omission_D2_s...,9,12.508228,3,0.0,60.0,1.605419,good,5506,0,10.0
...,...,...,...,...,...,...,...,...,...,...,...
99,20230619_115321_standard_comp_to_omission_D3_s...,73,12.354398,2,0.0,40.0,2.966960,good,6110,0,10.0
100,20230619_115321_standard_comp_to_omission_D3_s...,77,7.057085,7,0.0,140.0,0.553574,good,1140,0,21.0
101,20230619_115321_standard_comp_to_omission_D3_s...,87,11.722415,10,0.0,200.0,4.377602,good,9015,0,28.0
102,20230619_115321_standard_comp_to_omission_D3_s...,89,13.939378,13,0.0,260.0,1.063930,good,2191,0,32.0


In [24]:
recording_to_good_unit_ids = good_unit_cluster_info_df.groupby('recording_name')['cluster_id'].apply(list).to_dict()


- A list of all the unit IDs that each spike came from in order
    - First item is first spike, second item is second spike, etc.

In [25]:
recording_to_spike_clusters = {}
for recording_dir in INPUT_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        file_path = os.path.join(recording_dir, "phy", "spike_clusters.npy")
        recording_to_spike_clusters[recording_basename] = np.load(file_path)
    except Exception as e:
        print(e)

[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/old/phy/spike_clusters.npy'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230623_114932_standard_comp_to_both_rewarded_D2_subj_1-1_t1b2L_box1_merged.rec/phy/spike_clusters.npy'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230623_114932_standard_comp_to_both_rewarded_D2_subj_1-4_t13b3L_box1_merged.rec/phy/spike_clusters.npy'


In [26]:
recording_to_spike_clusters[list(recording_to_spike_clusters.keys())[0]]

array([[15],
       [ 0],
       [25],
       ...,
       [ 1],
       [21],
       [22]])

- The times that all the spikes happened

In [27]:
recording_to_spike_times = {}
for recording_dir in INPUT_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        file_path = os.path.join(recording_dir, "phy", "spike_times.npy")
        recording_to_spike_times[recording_basename] = np.load(file_path)
    except Exception as e:
        print(e)

[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/old/phy/spike_times.npy'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230623_114932_standard_comp_to_both_rewarded_D2_subj_1-1_t1b2L_box1_merged.rec/phy/spike_times.npy'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230623_114932_standard_comp_to_both_rewarded_D2_subj_1-4_t13b3L_box1_merged.rec/phy/spike_times.npy'


In [28]:
recording_to_spike_times[list(recording_to_spike_times.keys())[0]]

array([[     196],
       [     344],
       [     451],
       ...,
       [27602558],
       [27602668],
       [27602695]])

In [29]:
recording_to_spike_times[list(recording_to_spike_times.keys())[0]].shape

(234687, 1)

### Combining everything into a dataframe

In [30]:
recording_to_spike_clusters = {}
for recording_dir in INPUT_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        file_path = os.path.join(recording_dir, "phy", "spike_clusters.npy")
        recording_to_spike_clusters[recording_basename] = np.load(file_path)
    except Exception as e:
        print(e)

recording_to_spike_times = {}
for recording_dir in INPUT_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        file_path = os.path.join(recording_dir, "phy", "spike_times.npy")
        recording_to_spike_times[recording_basename] = np.load(file_path)
    except Exception as e:
        print(e)

recording_to_cluster_info = {}
for recording_dir in INPUT_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        file_path = os.path.join(recording_dir, "phy", "cluster_info.tsv")
        recording_to_cluster_info[recording_basename] = pd.read_csv(file_path, sep="\t")
    except Exception as e:
        print(e)

[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/old/phy/spike_clusters.npy'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230623_114932_standard_comp_to_both_rewarded_D2_subj_1-1_t1b2L_box1_merged.rec/phy/spike_clusters.npy'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230623_114932_standard_comp_to_both_rewarded_D2_subj_1-4_t13b3L_box1_merged.rec/phy/spike_clusters.npy'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/old/phy/spike_times.npy'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230623_114932_standard_comp_to_both_rewarded_D2_subj_1-1_t1b2L_box1_merged.rec/phy/spike_times.npy'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230623_114932_

In [31]:
recording_to_spike_df = {}
for recording_dir in INPUT_DIR:
    try:
        recording_basename = os.path.basename(recording_dir).strip(".rec")
        cluster_info_path = os.path.join(recording_dir, "phy", "cluster_info.tsv")
        cluster_info_df = pd.read_csv(cluster_info_path, sep="\t")

        spike_clusters_path = os.path.join(recording_dir, "phy", "spike_clusters.npy")
        spike_clusters = np.load(spike_clusters_path)
        
        spike_times_path = os.path.join(recording_dir, "phy", "spike_times.npy")
        spike_times = np.load(spike_times_path)

        spike_df = pd.DataFrame({'spike_clusters': spike_clusters, 'spike_times': spike_times.T[0]})

        merged_df = spike_df.merge(cluster_info_df, left_on='spike_clusters', right_on='cluster_id', how="left")
        merged_df = merged_df[merged_df["group"] == "good"].reset_index(drop = True)
        merged_df["recording_name"] = recording_basename

        if not merged_df.empty:
            recording_to_spike_df[recording_basename] = merged_df
       
    except Exception as e:
        print(e)

[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20221125_152723_competition_subject_6_1_top_3_base_2_merged.rec/phy/cluster_info.tsv'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230616_111904_standard_comp_to_training_D4_subj_1-4_t4b3L_box1_merged.rec/phy/cluster_info.tsv'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230612_101430_standard_comp_to_training_D1_subj_1-4_t4b2L_box1_merged.rec/phy/cluster_info.tsv'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20221202_134600_omission_and_competition_subject_6_1_top_2_base_3_merged.rec/phy/cluster_info.tsv'
[Errno 2] No such file or directory: '/scratch/back_up/reward_competition_extention/proc/spike_sorting/20230622_110832_standard_comp_to_both_rewarded_D1_subj_1-1_t1b3L_box1_merged.rec/phy/cluster_info.tsv'
[

In [32]:
spike_times

array([[     143],
       [     168],
       [     274],
       ...,
       [38957896],
       [38957942],
       [38957990]])

In [33]:
spike_clusters

array([76,  2, 54, ..., 62, 60, 62], dtype=int32)

In [34]:
spike_df

Unnamed: 0,spike_clusters,spike_times
0,76,143
1,2,168
2,54,274
3,49,553
4,45,652
...,...,...
379741,49,38957821
379742,9,38957868
379743,62,38957896
379744,60,38957942


In [35]:
spike_clusters

array([76,  2, 54, ..., 62, 60, 62], dtype=int32)

In [36]:
recording_to_spike_df[list(recording_to_spike_df.keys())[0]].head()

Unnamed: 0,spike_clusters,spike_times,cluster_id,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id,recording_name
0,7,937,7,8.4668,2,0,40.0,2.642555,good,9063,0,8,20230618_100636_standard_comp_to_omission_D2_s...
1,25,998,25,19.984566,24,0,480.0,2.39005,good,8197,0,27,20230618_100636_standard_comp_to_omission_D2_s...
2,38,1125,38,14.402934,6,0,120.0,7.804621,good,26767,0,11,20230618_100636_standard_comp_to_omission_D2_s...
3,25,1327,25,19.984566,24,0,480.0,2.39005,good,8197,0,27,20230618_100636_standard_comp_to_omission_D2_s...
4,38,1346,38,14.402934,6,0,120.0,7.804621,good,26767,0,11,20230618_100636_standard_comp_to_omission_D2_s...


In [37]:
recording_to_spike_df[list(recording_to_spike_df.keys())[0]].tail()

Unnamed: 0,spike_clusters,spike_times,cluster_id,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id,recording_name
117775,42,67096813,42,10.973118,6,0,120.0,3.327176,good,11411,0,12,20230618_100636_standard_comp_to_omission_D2_s...
117776,4,67097699,4,13.022879,2,0,40.0,9.569241,good,32819,0,5,20230618_100636_standard_comp_to_omission_D2_s...
117777,1,67098020,1,15.160955,0,0,0.0,2.351562,good,8065,0,2,20230618_100636_standard_comp_to_omission_D2_s...
117778,4,67098467,4,13.022879,2,0,40.0,9.569241,good,32819,0,5,20230618_100636_standard_comp_to_omission_D2_s...
117779,42,67098849,42,10.973118,6,0,120.0,3.327176,good,11411,0,12,20230618_100636_standard_comp_to_omission_D2_s...


# Labeling before or after

In [42]:
recording_to_spike_df.keys()

dict_keys(['20230618_100636_standard_comp_to_omission_D2_subj_1_4_t4b3L_box1_merged', '20230618_100636_standard_comp_to_omission_D2_subj_1_1_t1b2L_box2_merged', '20230620_114347_standard_comp_to_omission_D4_subj_1-1_t1b2L_box_2_merged', '20221122_161341_omission_subject_6_1_top_4_base_2', '20230620_114347_standard_comp_to_omission_D4_subj_1-2_t3b3L_box_1_merged', '20230619_115321_standard_comp_to_omission_D3_subj_1-4_t3b3L_box2_merged'])

In [43]:
all_trials_df = all_trials_df[all_trials_df["recording_file"].isin(recording_to_spike_df.keys())].reset_index(drop=True)

In [44]:
all_trials_df.head()

Unnamed: 0,time,state,recording_dir,recording_file,din,time_stamp_index,video_file,video_frame,video_number,subject_info,condition,competition_closeness,Unnamed: 13,resampled_index,all_subjects,current_subject,trial_outcome
0,1934930,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,1099250,20230618_100636_standard_comp_to_omission_D2_s...,1097,1.0,1_1_t1b2L_box2,1.4,Subj 2 Only,,54962,"[1.4, 1.1]",1.1,lose
1,1934930,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,1099250,20230618_100636_standard_comp_to_omission_D2_s...,1097,2.0,1_1_t1b2L_box2,1.4,Subj 2 Only,,54962,"[1.4, 1.1]",1.1,lose
2,4334936,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,3499256,20230618_100636_standard_comp_to_omission_D2_s...,3492,1.0,1_1_t1b2L_box2,1.1,Subj 1 Only,,174962,"[1.4, 1.1]",1.1,win
3,4334936,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,3499256,20230618_100636_standard_comp_to_omission_D2_s...,3492,2.0,1_1_t1b2L_box2,1.1,Subj 1 Only,,174962,"[1.4, 1.1]",1.1,win
4,6634931,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,5799251,20230618_100636_standard_comp_to_omission_D2_s...,5788,1.0,1_1_t1b2L_box2,1.1,Subj 1 Only,,289962,"[1.4, 1.1]",1.1,win


# Calculating the firing rate

In [45]:
all_trials_df.columns

Index(['time', 'state', 'recording_dir', 'recording_file', 'din',
       'time_stamp_index', 'video_file', 'video_frame', 'video_number',
       'subject_info', 'condition', 'competition_closeness', 'Unnamed: 13',
       'resampled_index', 'all_subjects', 'current_subject', 'trial_outcome'],
      dtype='object')

In [46]:
all_trials_df.head()

Unnamed: 0,time,state,recording_dir,recording_file,din,time_stamp_index,video_file,video_frame,video_number,subject_info,condition,competition_closeness,Unnamed: 13,resampled_index,all_subjects,current_subject,trial_outcome
0,1934930,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,1099250,20230618_100636_standard_comp_to_omission_D2_s...,1097,1.0,1_1_t1b2L_box2,1.4,Subj 2 Only,,54962,"[1.4, 1.1]",1.1,lose
1,1934930,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,1099250,20230618_100636_standard_comp_to_omission_D2_s...,1097,2.0,1_1_t1b2L_box2,1.4,Subj 2 Only,,54962,"[1.4, 1.1]",1.1,lose
2,4334936,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,3499256,20230618_100636_standard_comp_to_omission_D2_s...,3492,1.0,1_1_t1b2L_box2,1.1,Subj 1 Only,,174962,"[1.4, 1.1]",1.1,win
3,4334936,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,3499256,20230618_100636_standard_comp_to_omission_D2_s...,3492,2.0,1_1_t1b2L_box2,1.1,Subj 1 Only,,174962,"[1.4, 1.1]",1.1,win
4,6634931,1.0,20230618_100636_standard_comp_to_omission_D2_s...,20230618_100636_standard_comp_to_omission_D2_s...,dio_ECU_Din1,5799251,20230618_100636_standard_comp_to_omission_D2_s...,5788,1.0,1_1_t1b2L_box2,1.1,Subj 1 Only,,289962,"[1.4, 1.1]",1.1,win


In [47]:
grouped_trials_df = all_trials_df.groupby(["recording_file", "current_subject", "video_number"]).agg({
    'time_stamp_index': list,
    'trial_outcome': list,
    'competition_closeness': list}).reset_index()

In [48]:
grouped_trials_df.head()

Unnamed: 0,recording_file,current_subject,video_number,time_stamp_index,trial_outcome,competition_closeness
0,20230618_100636_standard_comp_to_omission_D2_s...,1.1,1.0,"[1099250, 3499256, 5799251, 7599250, 8699250, ...","[lose, win, win, lose, win, lose, lose, lose, ...","[Subj 2 Only, Subj 1 Only, Subj 1 Only, Subj 2..."
1,20230618_100636_standard_comp_to_omission_D2_s...,1.1,2.0,"[1099250, 3499256, 5799251, 7599250, 8699250, ...","[lose, win, win, lose, win, lose, lose, lose, ...","[Subj 2 Only, Subj 1 Only, Subj 1 Only, Subj 2..."
2,20230618_100636_standard_comp_to_omission_D2_s...,1.4,1.0,"[1099250, 3499256, 5799251, 7599250, 8699250, ...","[win, lose, lose, win, lose, win, win, win, lo...","[Subj 2 Only, Subj 1 Only, Subj 1 Only, Subj 2..."
3,20230618_100636_standard_comp_to_omission_D2_s...,1.4,2.0,"[1099250, 3499256, 5799251, 7599250, 8699250, ...","[win, lose, lose, win, lose, win, win, win, lo...","[Subj 2 Only, Subj 1 Only, Subj 1 Only, Subj 2..."
4,20230619_115321_standard_comp_to_omission_D3_s...,1.4,4.0,"[1277472, 3577468, 5377470, 6477469, 7477469, ...","[win, win, lose, win, lose, win, lose, lose, l...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."


In [49]:
grouped_trials_df = grouped_trials_df.drop_duplicates(subset=["recording_file", "current_subject"], keep='first').reset_index(drop=True)

In [50]:
grouped_trials_df.head()

Unnamed: 0,recording_file,current_subject,video_number,time_stamp_index,trial_outcome,competition_closeness
0,20230618_100636_standard_comp_to_omission_D2_s...,1.1,1.0,"[1099250, 3499256, 5799251, 7599250, 8699250, ...","[lose, win, win, lose, win, lose, lose, lose, ...","[Subj 2 Only, Subj 1 Only, Subj 1 Only, Subj 2..."
1,20230618_100636_standard_comp_to_omission_D2_s...,1.4,1.0,"[1099250, 3499256, 5799251, 7599250, 8699250, ...","[win, lose, lose, win, lose, win, win, win, lo...","[Subj 2 Only, Subj 1 Only, Subj 1 Only, Subj 2..."
2,20230619_115321_standard_comp_to_omission_D3_s...,1.4,4.0,"[1277472, 3577468, 5377470, 6477469, 7477469, ...","[win, win, lose, win, lose, win, lose, lose, l...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
3,20230620_114347_standard_comp_to_omission_D4_s...,1.1,1.0,"[5757314, 7557336, 8657352, 9657364, 11557385,...","[lose, win, win, win, win, lose, win, win, los...","[Subj 2 blocking Subj 1, Subj 1 Only, Subj 1 t..."
4,20230620_114347_standard_comp_to_omission_D4_s...,1.2,1.0,"[5757314, 7557336, 8657352, 9657364, 11557385,...","[win, lose, lose, lose, lose, win, lose, lose,...","[Subj 2 blocking Subj 1, Subj 1 Only, Subj 1 t..."


## Seeing which tone time each unit time is closest to

- We will divide all the time up into bins of before and after each tone. Then we will see which time bin the neuron firing falls into.
    - So everything in the 0 bin would be before the first time bin. The 1 bin would be everything in between the first and second time bin. The last would be everything after last time bin.

In [51]:
recording_to_spike_df[list(recording_to_spike_df.keys())[0]].head()

Unnamed: 0,spike_clusters,spike_times,cluster_id,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id,recording_name
0,7,937,7,8.4668,2,0,40.0,2.642555,good,9063,0,8,20230618_100636_standard_comp_to_omission_D2_s...
1,25,998,25,19.984566,24,0,480.0,2.39005,good,8197,0,27,20230618_100636_standard_comp_to_omission_D2_s...
2,38,1125,38,14.402934,6,0,120.0,7.804621,good,26767,0,11,20230618_100636_standard_comp_to_omission_D2_s...
3,25,1327,25,19.984566,24,0,480.0,2.39005,good,8197,0,27,20230618_100636_standard_comp_to_omission_D2_s...
4,38,1346,38,14.402934,6,0,120.0,7.804621,good,26767,0,11,20230618_100636_standard_comp_to_omission_D2_s...


In [74]:
def find_closest(num, ordered_list):
    # Using a list comprehension to find the differences and then return the number with the smallest difference
    return min(ordered_list, key=lambda x: abs(x - num))

In [79]:
for recording, spike_df in recording_to_spike_df.items():
    try:
        current_recording = grouped_trials_df[grouped_trials_df["recording_file"] == recording].iloc[0]
        current_time_stamps = current_recording["time_stamp_index"]
        spike_df["closest_timestamp"] = spike_df["spike_times"].apply(lambda x: find_closest(x, current_time_stamps))
        spike_df["timestamp_difference"] = spike_df["spike_times"] - spike_df["closest_timestamp"] 
        
    except Exception as e:
        print(e)
        continue

single positional indexer is out-of-bounds


In [72]:
len(current_time_stamps)

25

In [80]:
spike_df

Unnamed: 0,spike_clusters,spike_times,cluster_id,amp,ch,channel_group,depth,fr,group,n_spikes,sh,si_unit_id,recording_name,trial_bin,closest_timestamp,timestamp_difference
0,44,946,44,9.739503,17,0,340.0,13.732511,good,28280,0,46,20230619_115321_standard_comp_to_omission_D3_s...,0,1277472,-1276526
1,14,1119,14,14.356270,6,0,120.0,6.044636,good,12448,0,16,20230619_115321_standard_comp_to_omission_D3_s...,0,1277472,-1276353
2,44,1148,44,9.739503,17,0,340.0,13.732511,good,28280,0,46,20230619_115321_standard_comp_to_omission_D3_s...,0,1277472,-1276324
3,15,1241,15,11.366614,6,0,120.0,8.128792,good,16740,0,17,20230619_115321_standard_comp_to_omission_D3_s...,0,1277472,-1276231
4,58,1799,58,22.194180,24,0,480.0,2.756700,good,5677,0,60,20230619_115321_standard_comp_to_omission_D3_s...,0,1277472,-1275673
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164609,16,38957244,16,14.943707,6,0,120.0,0.653605,good,1346,0,18,20230619_115321_standard_comp_to_omission_D3_s...,23,38477451,479793
164610,16,38957331,16,14.943707,6,0,120.0,0.653605,good,1346,0,18,20230619_115321_standard_comp_to_omission_D3_s...,23,38477451,479880
164611,15,38957511,15,11.366614,6,0,120.0,8.128792,good,16740,0,17,20230619_115321_standard_comp_to_omission_D3_s...,23,38477451,480060
164612,44,38957537,44,9.739503,17,0,340.0,13.732511,good,28280,0,46,20230619_115321_standard_comp_to_omission_D3_s...,23,38477451,480086


- Seeing if the firing time is within a 5 second interval before or 10 second after any tone

In [None]:
grouped_trials_df["good_unit_times"].iloc[0][0][0]

In [None]:
from collections import defaultdict

In [None]:
def within_interval(row):
    """
    """
    within_interval_unit_times = defaultdict(list)
    for index, time in enumerate(row['good_unit_times']):
        current_bin = row['good_unit_bins'][index]

        try:
            # checking number before
            first_tone_time = row['time_stamp_index'][current_bin - 1]
            if time >= first_tone_time - 5 * 20000 and time <= first_tone_time + 10 * 20000:
                within_interval_unit_times[first_tone_time].append(
                    (first_tone_time, index, time, row['good_unit_all_ids']))
    
            # checking number after
            second_tone_time = row['time_stamp_index'][current_bin]
            if time >= second_tone_time - 5 * 20000 and time <= second_tone_time + 10 * 20000:
                within_interval_unit_times[second_tone_time].append(
                    (second_tone_time, index, time, row['good_unit_all_ids']))
        except:
            pass
    
    return within_interval_unit_times



In [None]:
grouped_trials_df["within_interval"] = grouped_trials_df.apply(within_interval, axis=1)

In [None]:
grouped_trials_df["within_interval"].iloc[1]

In [None]:
within_interval_unit_times = defaultdict(list)
for index, time in enumerate(good_unit_times):
    try:
        current_bin = good_unit_bins[index]


        # checking number before
        first_tone_time = training_and_competition_trials[current_bin - 1]
        if time >= first_tone_time - 5 * 20000 and time <= first_tone_time + 10 * 20000:
            within_interval_unit_times[first_tone_time].append((first_tone_time, index, time, good_unit_all_ids[index]))

        # checking number after
        second_tone_time = training_and_competition_trials[current_bin]
        if time >= second_tone_time - 5 * 20000 and time <= second_tone_time + 10 * 20000:
            within_interval_unit_times[second_tone_time].append((second_tone_time, index, time, good_unit_all_ids[index]))
    except:
        pass

In [None]:
within_interval_unit_times[training_and_competition_trials[0]][:10]

In [None]:
within_interval_unit_times[training_and_competition_trials[-1]][:10]

## Seeing which 100ms bin each unit time belongs to

- Creating bins for each 100ms time interval

In [None]:
tone_unit_time_to_interval = {}
for time in training_and_competition_trials:
    tone_unit_time_to_interval[time] = [time + num * 2000 for num in range(-50, 100)]

In [None]:
len(tone_unit_time_to_interval[training_and_competition_trials[0]])

- Seeing which bin the tone time fits into

In [None]:
all_bin_mapping = None
for key, value in within_interval_unit_times.items():
    unit_fire_time = [fire_time for tone_time, index, fire_time, unit_id in value]
    unit_fire_bin = np.digitize(unit_fire_time, tone_unit_time_to_interval[key], right=True)
    current_tone_bin_map = np.hstack((np.array(value), unit_fire_bin[np.newaxis].T))
    if all_bin_mapping is None:
        all_bin_mapping = current_tone_bin_map
    else: 
        all_bin_mapping = np.vstack((all_bin_mapping, current_tone_bin_map))
    

In [None]:
all_bin_mapping

## Calculating Firing Rate

In [None]:
from collections import Counter, defaultdict

- Creating a reference for all the tone bin combinations to each tone
    - This is because we need a unique identifier for each tone and bin combination. So we will just add the numbers

In [None]:
tone_bin_for_index = all_bin_mapping[:,0] + all_bin_mapping[:,4]

In [None]:
tone_bin_to_tone = np.hstack((tone_bin_for_index[np.newaxis].T, all_bin_mapping[:,0][np.newaxis].T))

In [None]:
tone_bin_to_tone

In [None]:
tone_bin_to_tone = dict(zip(tone_bin_to_tone[:,0], tone_bin_to_tone[:,1]))

In [None]:
tone_bin_to_tone

In [None]:
tone_bin_for_index

- Getting the number of times each neuron fires

In [None]:
firing_counts = defaultdict(lambda: defaultdict(dict))
for tone_time, index, firing_time, unit_id, bin_index in all_bin_mapping:
    tone_bin = tone_time + bin_index
    if firing_counts[unit_id][tone_bin]:
        firing_counts[unit_id][tone_bin] += 1
    else:
        firing_counts[unit_id][tone_bin] = 1

In [None]:
firing_rate_df = pd.DataFrame.from_dict(firing_counts)

In [None]:
firing_rate_df = firing_rate_df.fillna(value=0)

In [None]:
firing_rate_df.head()

In [None]:
firing_rate_df.loc[:].values

- Classifying each time stamp

In [None]:
all_bin_mapping

In [None]:
all_bin_mapping[:, 0]

In [None]:
all_bin_mapping[:, 2]

- Making a dictionary that maps all the firing times to the corresponding tone

In [None]:
fire_to_tone_time = dict(zip(all_bin_mapping[:, 2], all_bin_mapping[:, 0]))

In [None]:
fire_to_tone_time

In [None]:
firing_rate_df = firing_rate_df.reset_index()

- Labeling which tone time that fire time belongs to

In [None]:
firing_rate_df["tone_time"] = firing_rate_df["index"].map(tone_bin_to_tone)

In [None]:
firing_rate_df.head()

In [None]:
firing_rate_df["bin"] = firing_rate_df["index"] - firing_rate_df["tone_time"]

In [None]:
training_and_competition_trials

In [None]:
len(training_and_competition_trials)

In [None]:
# iterating through the timestamp of each good unit spike

before_and_after_interval_unit_times = defaultdict(dict)

per_trial_number_of_samples = trial_one_way_duration * sampling_rate

for index, time in enumerate(good_unit_times):
    # getting the corresponding tone bin for the timestamp
    current_bin = good_unit_bins[index]
    current_neuron = good_unit_all_ids[index]   
    
    # checking number before
    first_tone_time = tone_time_stamps[(current_bin - 1) % len(tone_time_stamps)]
    # checking number after
    second_tone_time = tone_time_stamps[(current_bin) % len(tone_time_stamps)]    
    
    # checking if time is before first number
    if first_tone_time - per_trial_number_of_samples <= time <= first_tone_time:
        before_and_after_interval_unit_times[time]["tone_time"] = first_tone_time
        before_and_after_interval_unit_times[time]["before_or_after"] = "before"
        before_and_after_interval_unit_times[time]["neuron_id"] = good_unit_all_ids[index]
    # checking if time is after first number
    elif first_tone_time < time <= first_tone_time + per_trial_number_of_samples:
        before_and_after_interval_unit_times[time]["tone_time"] = first_tone_time
        before_and_after_interval_unit_times[time]["before_or_after"] = "after"
        before_and_after_interval_unit_times[time]["neuron_id"] = good_unit_all_ids[index]
    # checking if time is before second number
    if second_tone_time - per_trial_number_of_samples <= time <= second_tone_time:
        before_and_after_interval_unit_times[time]["tone_time"] = second_tone_time
        before_and_after_interval_unit_times[time]["before_or_after"] = "before"
        before_and_after_interval_unit_times[time]["neuron_id"] = good_unit_all_ids[index]
    # checking if time is after second number
    elif second_tone_time < time <= second_tone_time + per_trial_number_of_samples:
        before_and_after_interval_unit_times[time]["tone_time"] = second_tone_time
        before_and_after_interval_unit_times[time]["before_or_after"] = "after"
        before_and_after_interval_unit_times[time]["neuron_id"] = good_unit_all_ids[index]

In [None]:
recording_to_good_unit_times

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=a4490980-3f6a-4f44-80eb-ebd789a5b21f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>