In [1]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns, glob, os
import scipy.stats as stats, scipy.io as sio


### create spikes df

In [2]:
# pull in QC
df_preproc = pd.read_csv('../../results/202512/preproc.csv')

# get [clustIDs] where df[keep] = 0
dropped_clustIDs = df_preproc[df_preproc['keep_new'] != 1]['units: CL_'].tolist()
dropped_clustIDs.extend([0, 99999999])
print(dropped_clustIDs)

[596.0, 612.0, 1432.0, 1502.0, 1543.0, 506.0, 703.0, 636.0, 683.0, 1469.0, 2405.0, 2423.0, 2457.0, 2481.0, 1518.0, 1755.0, 1348.0, 1396.0, 1553.0, 1566.0, 1715.0, 1772.0, 976.0, 1042.0, 1045.0, 1433.0, 1799.0, 1825.0, 1953.0, 1712.0, 2185.0, 2769.0, 3203.0, 3227.0, 2308.0, 2323.0, 264.0, 571.0, 1998.0, 1747.0, 1901.0, 1921.0, 1585.0, 1736.0, 1558.0, 2261.0, 2280.0, 1663.0, 1678.0, 1373.0, 1882.0, 130.0, 147.0, 82.0, 117.0, 141.0, 163.0, 157.0, 172.0, 148.0, 151.0, 118.0, 156.0, 182.0, 188.0, 167.0, 174.0, nan, 0, 99999999]


In [3]:
def get_ID_spikes_dict(clustIDs, spikes):
    ''' return dict with keys=unique clusters, and vals = list of corresponding spikes '''
    
    ID_spikes_dict = {}
    for clustID, spike in zip(clustIDs, spikes):

        if clustID in dropped_clustIDs: continue

        if clustID not in ID_spikes_dict: ID_spikes_dict[clustID] = [] # init

        ID_spikes_dict[clustID].append(spike)

    return ID_spikes_dict

In [4]:
samp_rate = 1000000
spikes_df = []

data_dir = '../../results/202512/osort_mat/sort/5'
for channel_num in range(len(os.listdir(data_dir))):

    channel_mat = sio.loadmat(f'{data_dir}/A{channel_num+1}_sorted_new.mat')

    # get (1 * n_spikes) clusterID & spiketimes
    clust_IDs = channel_mat['assignedNegative'][0]
    spikes = channel_mat['newTimestampsNegative'][0]

    # get clusterID: [spikes]
    ID_spikes_dict = get_ID_spikes_dict(clust_IDs, spikes)

    # 1 row per clustID
    clust_df = pd.DataFrame([
        {
            "channel": channel_num + 1,
            "clustID": clustID,
            "spikes": spikes,
            "#spikes": len(spikes),
            "avgFR": len(spikes) / ((spikes[-1] - spikes[0]) / samp_rate),
        }
        for clustID, spikes in ID_spikes_dict.items()
    ])
    spikes_df.append(clust_df)

    # if channel_num > 10: break

spikes_df = pd.concat(spikes_df, ignore_index=True)


In [5]:
print(len(spikes_df))
spikes_df

23


Unnamed: 0,channel,clustID,spikes,#spikes,avgFR
0,2,1583,"[4070566.666666667, 12215633.333333334, 123644...",5454,3.372888
1,5,952,"[18244333.333333336, 20953133.333333336, 21336...",1865,1.176957
2,6,2460,"[2239500.0, 2703900.0, 17368133.333333336, 180...",5565,3.437268
3,6,2477,"[18131033.333333336, 18393666.666666668, 18548...",10843,6.764394
4,6,2475,"[21893966.666666668, 23410966.666666668, 32298...",2432,1.520609
5,7,1466,"[17218533.333333336, 18210833.333333336, 18423...",3834,2.406596
6,8,2410,"[2873333.3333333335, 3584833.3333333335, 38527...",2370,1.465269
7,8,2349,"[5895300.0, 13255100.000000002, 17287733.33333...",8883,5.499847
8,9,1938,"[612466.6666666667, 782133.3333333334, 1033233...",8664,5.346766
9,9,1892,"[2822233.3333333335, 4503333.333333334, 561126...",3353,2.076493


### psychopy

In [6]:
psychopy_df = pd.read_csv(glob.glob('../../results/psychopy/*202512*.csv')[0])[:-1]
psychopy_df

Unnamed: 0,thisN,thisTrialN,thisRepN,blockN,run,condition,trial_key,stim_file_pos,stim_pos,noise_pos,...,blocks.block_end_resp.duration,subj,difficulty,sess_type,date,expName,psychopyVersion,frameRate,expStart,Unnamed: 112
0,0.0,0.0,0.0,1.0,1.0,baseline,21.0,2.1,0.02,3.0,...,,202512.0,patients,C,2025-07-26_12h47.29.109,asymmetry_final,2024.2.4,60.0,2025-07-26 12h47.44.103153 -0600,
1,1.0,1.0,0.0,1.0,1.0,baseline,35.0,3.5,0.30,3.0,...,,202512.0,patients,C,2025-07-26_12h47.29.109,asymmetry_final,2024.2.4,60.0,2025-07-26 12h47.44.103153 -0600,
2,2.0,2.0,0.0,1.0,1.0,baseline,26.0,2.7,0.14,3.0,...,,202512.0,patients,C,2025-07-26_12h47.29.109,asymmetry_final,2024.2.4,60.0,2025-07-26 12h47.44.103153 -0600,
3,3.0,3.0,0.0,1.0,1.0,baseline,5.0,0.5,-0.30,1.0,...,,202512.0,patients,C,2025-07-26_12h47.29.109,asymmetry_final,2024.2.4,60.0,2025-07-26 12h47.44.103153 -0600,
4,4.0,4.0,0.0,1.0,1.0,baseline,37.0,3.7,0.34,3.0,...,,202512.0,patients,C,2025-07-26_12h47.29.109,asymmetry_final,2024.2.4,60.0,2025-07-26 12h47.44.103153 -0600,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,35.0,35.0,0.0,6.0,2.0,flat_comp,228.0,3.5,0.30,1.0,...,,202512.0,patients,C,2025-07-26_12h47.29.109,asymmetry_final,2024.2.4,60.0,2025-07-26 12h47.44.103153 -0600,
236,36.0,36.0,0.0,6.0,2.0,flat_comp,201.0,1.1,-0.18,3.0,...,,202512.0,patients,C,2025-07-26_12h47.29.109,asymmetry_final,2024.2.4,60.0,2025-07-26 12h47.44.103153 -0600,
237,37.0,37.0,0.0,6.0,2.0,flat_comp,237.0,3.9,0.38,1.0,...,,202512.0,patients,C,2025-07-26_12h47.29.109,asymmetry_final,2024.2.4,60.0,2025-07-26 12h47.44.103153 -0600,
238,38.0,38.0,0.0,6.0,2.0,flat_comp,217.0,2.7,0.14,3.0,...,,202512.0,patients,C,2025-07-26_12h47.29.109,asymmetry_final,2024.2.4,60.0,2025-07-26 12h47.44.103153 -0600,


In [7]:
phases = ['baseline', 'stim', 'delay', 'task', 'anticipation', 'feedback']
phase_bounds_dict = {}

for phase in phases:

    # e.g., baseline: ['baseline.started', 'baseline.stopped']
    phase_bounds_dict[phase] = [f'{phase}.started', f'{phase}.stopped']

    # optional. sid, check
    if phase == 'stim': phase_bounds_dict[phase] = [f'target_stim.started', f'{phase}.stopped']
    if phase == 'feedback': phase_bounds_dict[phase] = [f'no_resp_text.started', f'{phase}.stopped']

phase_cols = []
for key, val in phase_bounds_dict.items():
    phase_cols.extend(val)
    print(f'{key}: {val}')

psychopy_df[phase_cols]

baseline: ['baseline.started', 'baseline.stopped']
stim: ['target_stim.started', 'stim.stopped']
delay: ['delay.started', 'delay.stopped']
task: ['task.started', 'task.stopped']
anticipation: ['anticipation.started', 'anticipation.stopped']
feedback: ['no_resp_text.started', 'feedback.stopped']


Unnamed: 0,baseline.started,baseline.stopped,target_stim.started,stim.stopped,delay.started,delay.stopped,task.started,task.stopped,anticipation.started,anticipation.stopped,no_resp_text.started,feedback.stopped
0,34.604457,35.489340,35.500442,36.483524,36.483904,37.967865,37.969383,40.001099,40.001473,40.250284,40.266833,41.250330
1,41.250907,42.234557,42.250238,43.233700,43.234072,44.669215,44.670643,46.234718,46.235103,46.483779,46.500337,47.483829
2,47.484361,48.370060,48.383728,49.367172,49.367572,50.802966,50.804329,52.868121,52.868499,53.117306,53.133851,54.117367
3,54.117896,55.201769,55.217254,56.200705,56.201083,57.684797,57.686062,59.318274,59.318649,59.567454,59.584023,60.567499
4,60.568024,61.652020,61.667405,62.650888,62.651274,63.986543,63.987762,65.368478,65.368850,65.617640,65.634198,66.617662
...,...,...,...,...,...,...,...,...,...,...,...,...
235,1547.058425,1547.942015,1547.957796,1548.941241,1548.941616,1550.376709,1550.377940,1552.308798,1552.309160,1552.558072,1552.574577,1553.558074
236,1553.558603,1554.493811,1554.507907,1555.491422,1555.491802,1556.826150,1556.827339,1558.492330,1558.492705,1558.741499,1558.758047,1559.741539
237,1559.742073,1560.727593,1560.741456,1561.724937,1561.725308,1563.010160,1563.011427,1564.192590,1564.192956,1564.441677,1564.458230,1565.441714
238,1565.442237,1566.377649,1566.391613,1567.375087,1567.375467,1568.709188,1568.710407,1569.959316,1569.959689,1570.208482,1570.225047,1571.208538


In [8]:
phase_spikes_cols = [f'{phase}_spikes' for phase in phases]
phase_aligned_spikes_cols = [f'{phase}_aligned_spikes' for phase in phases]
phase_spikes_cols, phase_aligned_spikes_cols

(['baseline_spikes',
  'stim_spikes',
  'delay_spikes',
  'task_spikes',
  'anticipation_spikes',
  'feedback_spikes'],
 ['baseline_aligned_spikes',
  'stim_aligned_spikes',
  'delay_aligned_spikes',
  'task_aligned_spikes',
  'anticipation_aligned_spikes',
  'feedback_aligned_spikes'])

In [9]:
# prep
# bad = psychopy_df['invalid'] | psychopy_df['missed']
# psychopy_df = psychopy_df[~bad].copy()

phases = ['baseline','stim','delay','task','anticipation','feedback']
win = {p: (psychopy_df[f'{p}.started'].to_numpy(),
           psychopy_df[f'{p}.stopped'].to_numpy()) for p in phases}

out_rows = []
for n, spk in spikes_df.groupby('clustID'):
    times = np.asarray(spk['spikes'].values)  # sorted
    for p in phases:
        s, e = win[p]
        # per-trial slices via searchsorted
        lo = np.searchsorted(times, s, side='left')
        hi = np.searchsorted(times, e, side='right')
        for tr, (i0, i1) in enumerate(zip(lo, hi)):
            tseg = times[i0:i1]
            out_rows.append(dict(
                trial_id=psychopy_df.index[tr],
                neuron=n,
                phase=p,
                spike_times=tseg,
                spike_times_aligned=tseg - s[tr],
                n_spikes=len(tseg),
                phase_dur=(e[tr]-s[tr]),
                fr=len(tseg)/(e[tr]-s[tr]) if (e[tr]>s[tr]) else np.nan
            ))

df_phase = pd.DataFrame(out_rows)


TypeError: '<' not supported between instances of 'list' and 'float'