In [1]:
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath('')))
from src.data_processing import *
import pandas as pd
import numpy as np

In [2]:
organized_data_folder = "/ceph/sjones/projects/sequence_squad/organised_data/animals/EJT178_implant1/recording7_30-03-2022/"
processed_data_folder = "/nfs/gatsbystor/nicholasg/striatal_replay/processed_data"
ephys_folder = "/ceph/sjones/projects/sequence_squad/data/raw_neuropixel/OE_DATA/EJT178/300322/2022-03-30_13-48-39/"

In [3]:
trajectories = pd.read_csv(os.path.join(processed_data_folder, "trajectories.csv"), index_col=0)
trajectories

Unnamed: 0,trial_id,camera_idx,camera_time,ephys_time,linear_position,x_position,y_position,port1,port2
22191,1,22191,369.857173,4800.884355,0.000000,670.492310,513.852478,2,1
22192,1,22192,369.873841,4800.901010,-0.607927,671.156128,512.822449,2,1
22193,1,22193,369.890509,4800.917665,-0.010407,670.305847,513.338928,2,1
22194,1,22194,369.907178,4800.934320,0.082560,670.071716,513.164001,2,1
22195,1,22195,369.923846,4800.950974,0.010824,670.314087,513.453674,2,1
...,...,...,...,...,...,...,...,...,...
163228,276,163228,2720.473922,7150.310765,597.237558,650.354614,576.198364,3,7
163229,276,163229,2720.490593,7150.327440,606.095453,654.041809,570.472900,3,7
163230,276,163230,2720.507265,7150.344115,616.064659,655.980164,564.733582,3,7
163231,276,163231,2720.523936,7150.360789,627.483287,657.201050,558.478149,3,7


In [4]:
# Only need to run this cell once to generate the spikes in a matrix format usable by the decoder. This will save a "spikes.npy" file in the processed_data_folder which can be loaded afterwards to save time.

Fs = 30000.0
cutoff = 1_000_000

ephys_timestamps = get_ephys_timestamps(ephys_folder)

spike_times = (np.load(os.path.join(organized_data_folder, "ephys", "kilosort3", "spike_times.npy")).squeeze() / Fs) + ephys_timestamps[0]
spike_clusters = np.load(os.path.join(organized_data_folder, "ephys", "kilosort3", "spike_clusters.npy")).squeeze()

spikes = get_spike_matrix(spike_clusters, cutoff)

np.save(os.path.join(processed_data_folder, "spikes.npy"), spikes)
display(spikes)

Processor ID: 102, Stream Name: 0, Line: 1 (main sync line))
  First event sample number: 60920380
  Last event sample number: 512017786
  Total sync events: 15036
  Sample rate: 30000


array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [5]:
# Only need to run this cell once to generate the times corresponding to spikes and position. This will save a "times.npy" file in the processed_data_folder which can be loaded afterwards to save time.

trial_start_times = trajectories.groupby(["trial_id"]).agg("first")["ephys_time"]
trial_start_times.name = "start_time"
trial_end_times = trajectories.groupby(["trial_id"]).agg("last")["ephys_time"]
trial_end_times.name = "end_time"

trial_ephys_times = pd.concat([trial_start_times, trial_end_times], axis=1)

times = np.array([])
for i, (trial_time) in trial_ephys_times.iterrows():
    start = trial_time["start_time"]
    end = trial_time["end_time"]
    mask_start = (spike_times >= start).argmax()
    mask_end = (spike_times >= end).argmax()
    times = np.append(times, spike_times[mask_start:mask_end])
    if len(times) > cutoff:
        times = times[:cutoff]
        break

np.save(os.path.join(processed_data_folder, "times.npy"), times)
display(times)

array([4800.88493333, 4800.88536667, 4800.88553333, ..., 6471.92076667,
       6471.92086667, 6471.92093333])

In [6]:
# Only need to run this cell once to generate the position at each spike. This will save a "position.npy" file in the processed_data_folder which can be loaded afterwards to save time.

position = np.array([])
for i, spike_time in enumerate(times):
    print(f"{(i+1)/len(times)*100:.2f}%", end="\r")
    idx = (trajectories["ephys_time"] - spike_time).abs().argmin()
    position = np.append(position, trajectories.iloc[idx]["linear_position"])

np.save(os.path.join(processed_data_folder, "position.npy"), position)
display(position)

100.00%

array([   0.        ,    0.        ,    0.        , ..., -265.65648489,
       -265.65648489, -265.65648489])

In [7]:
spikes = np.load(os.path.join(processed_data_folder, "spikes.npy"))
position = np.load(os.path.join(processed_data_folder, "position.npy"))
times = np.load(os.path.join(processed_data_folder, "times.npy"))
display(spikes)
display(position)
display(times)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

array([   0.        ,    0.        ,    0.        , ..., -265.65648489,
       -265.65648489, -265.65648489])

array([4800.88493333, 4800.88536667, 4800.88553333, ..., 6471.92076667,
       6471.92086667, 6471.92093333])