In [14]:
import os
import subprocess
import h5py
from scipy.io import loadmat
import numpy as np
from preproc import *
import pickle

In [15]:
# Specify which sessions' dataset to use
prefix = "/Volumes/Hippocampus/Data/picasso-misc/"
month_list = ["201811", "201810", "201809", "201808", "201807"]

# Save directory for data files
save_dir = "data/combined"

# Whether to overwrite preexisting files
overwrite = True

In [16]:
# Get list of days from list of months
day_list = list()
for month in month_list:
    result = subprocess.run(f'find {prefix} -type d -maxdepth 1 -name "{month}*" | cut -f 7 -d "/"', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True)
    day_list.extend(result.stdout.split('\n'))
day_list = list(filter(lambda str: str, day_list))
day_list.sort(reverse=True)

# Filter out these 4 days
blacklist = ["20180927", "20180821", "20180816", "20180711"]
for day in blacklist:
    if day in day_list:
        day_list.remove(day)

# Create directory to save data files to
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

In [17]:
# Save list of days as a txt file
with open(f'{save_dir}/days.txt', 'w') as file:
    for day in day_list:
        file.write(f'{day}\n')

In [18]:
# For processing hint/nav end phase data

# Grab periods in the navigation phase when the hint image was looked at
# As well as which goal it corresponded to
def chunk_into_intervals(timepts: np.array, tbin: float=0.001) -> np.array:
    intervals = list()
    i, st = 0, None
    while i < timepts.shape[0]:
        curr = timepts[i]
        if st is None:
            st = curr
        elif curr > prev + 1.1 * tbin:
            intervals.append(np.array([st, prev+tbin]))
            st = curr
        prev = curr
        i += 1
    if st is not None:
        intervals.append(np.array([st, prev+tbin]))
    return np.array(intervals)

# View bins for all of the posters on the pillar walls
poster_viewbins = np.concatenate([np.arange(4533,4536+1), np.arange(4565,4568+1), np.arange(4597,4600+1), np.arange(4685,4688+1), np.arange(4717,4720+1), np.arange(4749,4752+1),\
                                    np.arange(4701,4704+1), np.arange(4733,4736+1), np.arange(4765,4768+1), np.arange(4845,4848+1), np.arange(4877,4480+1), np.arange(4909,4912+1),\
                                        np.arange(4861,4864+1), np.arange(4893,4896+1), np.arange(4925,4928+1), np.arange(4997,5000+1), np.arange(5029,5032+1), np.arange(5061,5064+1)])
poster_viewbins = set(poster_viewbins)

In [19]:
for day_dir in day_list:
    if os.path.exists(f'{save_dir}/{day_dir}_data.pkl') and not overwrite:
        continue

    ### Load in data from files ###

    # Get list of cells under the day directory
    os.system(f"sh ~/Documents/neural_decoding/Hippocampus_Decoding/get_cells.sh {day_dir}")
    cell_list = list()
    with open("cell_list.txt", "r") as file:
        for line in file.readlines():
            cell_list.append(line.strip())
    os.system("rm cell_list.txt")

    # Load data from rplparallel.mat object, extract trial markers, time stamps and session start timestamp
    rp_file = h5py.File(prefix + day_dir + "/session01/rplparallel.mat")
    rp = rp_file.get('rp').get('data')
    trial_markers = np.array(rp.get('markers'))
    trial_timestamps = np.round(np.array(rp.get('timeStamps')), 3)
    session_start_time = np.round(np.array(rp.get('session_start_sec'))[0,0], 3)
    rp_file.close()

    # Load data and extract spike times from all spiketrain.mat objects
    spike_times = list()
    cell_labels = list()
    for cell_dir in cell_list:
        try:
            spk_file = loadmat(prefix + day_dir + "/session01/" + cell_dir + "/spiketrain.mat")
        except NotImplementedError:
            spk_file = h5py.File(prefix + day_dir + "/session01/" + cell_dir + "/spiketrain.mat")
        except FileNotFoundError:
            continue
        spk = np.array(spk_file.get('timestamps')).flatten() # spike timestamps is loaded in as a column vector
        spk /= 1000 # convert spike timestamps from msec to sec
        spike_times.append(spk)
        if isinstance(spk_file, h5py.File):
            spk_file.close()
        
        cell_name = cell_dir.split('/')
        array, channel, cell = cell_name[0][6:], cell_name[1][7:], cell_name[2][5:]
        if channel[0] == '0':
            channel = channel[1:]
        cell_labels.append(f'a{array}/ch{channel}/c{cell}')

    # Load data from vmpv.mat object, extract session end timestamp
    pv_file = h5py.File(prefix + day_dir + "/session01/1vmpv.mat")
    pv = pv_file.get('pv').get('data')
    session_end_time = np.round(np.array(pv.get('rplmaxtime'))[0,0], 3)
    pv_file.close()

    # Load data from binData.hdf object, extract view bin data
    viewbin_data = np.genfromtxt(prefix + day_dir + "/session01/1binData.csv", delimiter=',')
    viewbin_data[:,0] /= 1000 # convert eyelink timestamps from msec to sec


    ### Data preprocessing ###

    # Define important constants
    num_cells = len(cell_labels)
    num_goals = 6
    time_res = 0.001
    tbin_size = 0.1

    # Get trial outcomes from trial markers
    # 0 for unsuccessful trials, 1 for successful trials
    trial_outcomes = 4 - trial_markers[2,:] // 10
    trial_outcomes = trial_outcomes.astype(int)

    # Get poster numbers from trial markers, cue phase time intervals
    trial_markers = trial_markers[0,:] % 10
    trial_markers = trial_markers.astype(int)
    cue_intervals = trial_timestamps[0:2,:].T

    # Get durations of each navigation phase
    nav_intervals = trial_timestamps[1:,:].T
    nav_durations = nav_intervals[:,1] - nav_intervals[:,0]

    # Generate time intervals for each trial
    trial_intervals = np.empty_like(cue_intervals)
    trial_intervals[:,0] = cue_intervals[:,1]
    trial_intervals[:-1,1] = cue_intervals[1:,0]
    trial_intervals[-1,1] = session_end_time

    # Generate time intervals for each inter-trial interval
    iti_intervals = np.empty_like(cue_intervals)
    iti_intervals[:,0] = nav_intervals[:,1]
    iti_intervals[:-1,1] = cue_intervals[1:,0]
    iti_intervals[-1,1] = session_end_time

    # Further differentiate trial markers into trial trajectories (start poster, end poster)
    trial_trajectories = np.zeros((trial_markers.shape[0], 2))
    trial_trajectories[:,1] = trial_markers
    trial_trajectories[1:,0] = trial_markers[:-1]

    # Filter out trials that are too long (> 25 seconds) or have repeated goal from previous trial
    good_trials = np.ones(trial_markers.shape, dtype=np.int8)
    max_dur = 25  # maximum duration of trials (in seconds) to filter out
    prev_goal = 0
    for num, dur in enumerate(nav_durations):
        curr_goal = trial_markers[num]
        if dur > max_dur or curr_goal == prev_goal:
            good_trials[num] = 0
        prev_goal = curr_goal
    trial_filt = np.where(good_trials == 1)

    trial_outcomes = trial_outcomes[trial_filt]
    trial_markers = trial_markers[trial_filt]
    cue_intervals = cue_intervals[trial_filt]
    nav_intervals = nav_intervals[trial_filt]
    nav_durations = nav_durations[trial_filt]
    trial_intervals = trial_intervals[trial_filt]
    iti_intervals = iti_intervals[trial_filt]
    trial_trajectories = trial_trajectories[trial_filt]


    ### Cue phase, 100 ms time bins ###

    # Preallocate list of arrays for spike counts per time bin in cue phase
    # Then slot spikes into cue phase intervals, using time bin resolution specified by tbin_size
    timebins_cue, spikecounts_cue = list(), list()
    for trial in cue_intervals:
        st_time, ed_time = trial
        ed_time = st_time + one_second
        timebins_cue_trial = np.hstack([np.arange(st_time, ed_time, tbin_size).reshape(-1,1), np.arange(st_time, ed_time, tbin_size).reshape(-1,1) + tbin_size])
        timebins_cue.append(timebins_cue_trial)
        spikecounts_cue.append(np.array(spike_counts_per_observation(timebins_cue_trial, spike_times)))

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_cue = [trial/tbin_size for trial in spikecounts_cue]

    # Filter out trials with nans in cue spike rates
    goals_cue = trial_markers.copy()
    trajectories_cue = trial_trajectories.copy()

    # Package variables for saving
    cue_100ms = {'spikerates_cue': spikerates_cue, 'goals_cue': goals_cue, 'trajectories_cue': trajectories_cue}


    ### Cue phase, mean over cue image views ###

    # Extract periods in cue phase where cue image was not looked at
    # Preallocate list of arrays for spike counts per time bin in cue phase
    cueimg_filter = list()
    timebins_cue = list()
    spikecounts_cue = list()
    for trial in cue_intervals:
        st_time, ed_time = trial
        viewbin_data_trial = viewbin_data[(viewbin_data[:,0] >= st_time) & (viewbin_data[:,0] < ed_time)]
        cueimg_filter.append(viewbin_data_trial[:,1] == 1)
        timebins_cue.append(np.hstack([viewbin_data_trial[:,0].reshape(-1,1), viewbin_data_trial[:,0].reshape(-1,1)+time_res]))
        spikecounts_cue.append(np.zeros((viewbin_data_trial.shape[0], num_cells)))

    # Slot spikes into cue phase intervals, using time bin resolution same as eyelink/raycasting data (i.e. 1 ms)
    # Also filter out time bins where cue image was not looked at
    for i, trial in enumerate(timebins_cue):
        spikecounts_cue[i] = np.array(spike_counts_per_observation(trial, spike_times))
        spikecounts_cue[i] = spikecounts_cue[i][cueimg_filter[i]]

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_cue = np.empty((len(spikecounts_cue), num_cells))
    spikerates_cue[:,:] = np.nan
    for i, trial in enumerate(spikecounts_cue):
        if timebins_cue[i].shape[0] > 0:
            spikerates_cue[i,:] = np.sum(trial, axis=0)/(time_res*timebins_cue[i].shape[0])

    # Filter out trials with nans in cue spike rates
    goals_cue = trial_markers.copy()
    trajectories_cue = trial_trajectories.copy()
    cue_filter = np.all(np.isfinite(spikerates_cue),axis=1)
    spikerates_cue = spikerates_cue[cue_filter,:]
    goals_cue = goals_cue[cue_filter]
    trajectories_cue = trajectories_cue[cue_filter]

    # Package variables for saving
    cue_mean = {'spikerates_cue': spikerates_cue, 'goals_cue': goals_cue, 'trajectories_cue': trajectories_cue}


    ### Hint views, mean over hint image views ###

    timebins_hints = list()
    spikecounts_hints = list()
    for trial in nav_intervals:
        st_time, ed_time = trial
        viewbin_data_trial = viewbin_data[(viewbin_data[:,0] >= st_time) & (viewbin_data[:,0] < ed_time)]
        hint_periods_trial = chunk_into_intervals(viewbin_data_trial[viewbin_data_trial[:,1] == 2, 0])
        timebins_hints.append(hint_periods_trial)
        spikecounts_hints.append(np.zeros((hint_periods_trial.shape[0], num_cells)))

    # Slot spikes into hint viewing periods
    for i, trial in enumerate(timebins_hints):
        spikecounts_hints[i] = np.array(spike_counts_per_observation(trial, spike_times))

    # Convert spike counts to average spike rates across all hint viewing instances per trial
    spikerates_hints = np.empty((len(spikecounts_hints), num_cells))
    spikerates_hints[:,:] = np.nan
    for i, trial in enumerate(spikecounts_hints):
        if timebins_hints[i].shape[0] > 0:
            spikerates_hints[i,:] = np.sum(trial, axis=0)/np.sum(timebins_cue[i][:,1]-timebins_cue[i][:,0])
    goals_hints = trial_markers.copy()
    trajectories_hints = trial_trajectories.copy()

    # Filter out trials with nans in hint spike rates
    hints_filter = np.all(np.isfinite(spikerates_hints),axis=1)
    spikerates_hints = spikerates_hints[hints_filter,:]
    goals_hints = goals_hints[hints_filter]
    trajectories_hints = trajectories_hints[hints_filter]

    # Package variables for saving
    hint_mean = {'spikerates_hints': spikerates_hints, 'goals_hints': goals_hints, 'trajectories_hints': trajectories_hints}


    ### Nav end phase, mean over poster views ###

    # Get last 1 second of navigation phases, but filter out unsuccessful trials
    one_second = 1
    successful_trials = np.where(trial_outcomes == 1)

    # Filter out time points in the 1 second where the poster was not looked at
    poster_filter = list()
    timebins_navend = list()
    spikecounts_navend = list()
    for trial in nav_intervals:
        st_time, ed_time = trial
        st_time = ed_time - one_second
        viewbin_data_trial = viewbin_data[(viewbin_data[:,0] >= st_time) & (viewbin_data[:,0] < ed_time)]
        poster_filter.append(np.array([viewbin_data_trial[i,1] in poster_viewbins for i in range(viewbin_data_trial.shape[0])]))
        timebins_navend.append(np.hstack([viewbin_data_trial[:,0].reshape(-1,1), viewbin_data_trial[:,0].reshape(-1,1)+tbin_size]))
        spikecounts_navend.append(np.zeros((viewbin_data_trial.shape[0], num_cells)))

    # Slot spikes into navend intervals, using time bin resolution same as eyelink/raycasting data (i.e. 1 ms)
    # Also filter out time bins where poster was not looked at
    for i, trial in enumerate(timebins_navend):
        spikecounts_navend[i] = np.array(spike_counts_per_observation(trial, spike_times))
        if poster_filter[i].shape[0] > 0:
            spikecounts_navend[i] = spikecounts_navend[i][poster_filter[i]]

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_navend = np.empty((len(spikecounts_navend), num_cells))
    spikerates_navend[:,:] = np.nan
    for i, trial in enumerate(spikecounts_navend):
        if timebins_navend[i].shape[0] > 0:
            spikerates_navend[i,:] = np.sum(trial, axis=0)/(tbin_size*timebins_navend[i].shape[0])

    # Filter out trials with nans in navend spike rates
    goals_navend = trial_markers.copy()
    trajectories_navend = trial_trajectories.copy()
    navend_filter = np.all(np.isfinite(spikerates_navend),axis=1)
    spikerates_navend = spikerates_navend[navend_filter,:]
    goals_navend = goals_navend[navend_filter]
    trajectories_navend = trajectories_navend[navend_filter]

    # Package variables for saving
    navend_mean = {'spikerates_navend': spikerates_navend, 'goals_navend': goals_navend, 'trajectories_navend': trajectories_navend}


    ### Inter-trial intervals, 100ms time bins ###

    # Preallocate list of arrays for spike counts per time bin in cue phase
    # Then slot spikes into cue phase intervals, using time bin resolution specified by tbin_size
    timebins_iti, spikecounts_iti = list(), list()
    for trial in iti_intervals:
        st_time, ed_time = trial
        st_time = ed_time - 2 * one_second
        timebins_iti_trial = np.hstack([np.arange(st_time, ed_time, tbin_size).reshape(-1,1), np.arange(st_time, ed_time, tbin_size).reshape(-1,1) + tbin_size])
        timebins_iti.append(timebins_iti_trial)
        spikecounts_iti.append(np.array(spike_counts_per_observation(timebins_iti_trial, spike_times)))

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_iti = [trial/tbin_size for trial in spikecounts_iti]

    # Filter out trials with nans in cue spike rates
    goals_iti = trial_markers.copy()
    trajectories_iti = trial_trajectories.copy()

    # Package variables for saving
    iti_100ms = {'spikerates_iti': spikerates_iti, 'goals_iti': goals_iti, 'trajectories_iti': trajectories_iti}


    ### Nav end phase, 100ms time bins ###

    # Preallocate list of arrays for spike counts per time bin in cue phase
    # Then slot spikes into cue phase intervals, using time bin resolution specified by tbin_size
    timebins_navend, spikecounts_navend = list(), list()
    for trial in nav_intervals:
        st_time, ed_time = trial
        st_time = ed_time - one_second
        timebins_navend_trial = np.hstack([np.arange(st_time, ed_time, tbin_size).reshape(-1,1), np.arange(st_time, ed_time, tbin_size).reshape(-1,1) + tbin_size])
        timebins_navend.append(timebins_navend_trial)
        spikecounts_navend.append(np.array(spike_counts_per_observation(timebins_navend_trial, spike_times)))

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_navend = [trial/tbin_size for trial in spikecounts_navend]

    # Filter out trials with nans in cue spike rates
    goals_navend = trial_markers.copy()
    trajectories_navend = trial_trajectories.copy()

    # Package variables for saving
    navend_100ms = {'spikerates_navend': spikerates_navend, 'goals_navend': goals_navend, 'trajectories_navend': trajectories_navend}


    ### Nav start phase, 100ms time bins ###

    # Preallocate list of arrays for spike counts per time bin in cue phase
    # Then slot spikes into cue phase intervals, using time bin resolution specified by tbin_size
    timebins_navst, spikecounts_navst = list(), list()
    for trial in nav_intervals:
        st_time, ed_time = trial
        ed_time = st_time + one_second
        timebins_navst_trial = np.hstack([np.arange(st_time, ed_time, tbin_size).reshape(-1,1), np.arange(st_time, ed_time, tbin_size).reshape(-1,1) + tbin_size])
        timebins_navst.append(timebins_navst_trial)
        spikecounts_navst.append(np.array(spike_counts_per_observation(timebins_navst_trial, spike_times)))

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_navst = [trial/tbin_size for trial in spikecounts_navst]

    # Filter out trials with nans in cue spike rates
    goals_navst = trial_markers.copy()
    trajectories_navst = trial_trajectories.copy()

    # Package variables for saving
    navst_100ms = {'spikerates_navst': spikerates_navst, 'goals_navst': goals_navst, 'trajectories_navst': trajectories_navst}


    ### Save processed data to pkl file ###
    for i, label in enumerate(cell_labels):
        label = label.split('/')
        label[1] = label[1][2:]
        if label[1][0] == '0':
            label[1] = label[1][1:]
        cell_labels[i] = f'{day_dir}ch{label[1]}{label[2]}'

    data = {'cue_mean': cue_mean, 'hint_mean': hint_mean, 'navend_mean': navend_mean, 'cue_100ms': cue_100ms, 'navst_100ms': navst_100ms, 'navend_100ms': navend_100ms, 'iti_100ms': iti_100ms,\
            'cell_labels': cell_labels}
    with open(f'{save_dir}/{day_dir}_data.pkl', 'wb') as file:
        pickle.dump(data, file)

  spikerates_hints[i,:] = np.sum(trial, axis=0)/np.sum(timebins_cue[i][:,1]-timebins_cue[i][:,0])
  spikerates_hints[i,:] = np.sum(trial, axis=0)/np.sum(timebins_cue[i][:,1]-timebins_cue[i][:,0])


In [21]:
for day_dir in day_list:
    if os.path.exists(f'{save_dir}_50ms/{day_dir}_data.pkl') and not overwrite:
        continue

    ### Load in data from files ###

    # Get list of cells under the day directory
    os.system(f"sh ~/Documents/neural_decoding/Hippocampus_Decoding/get_cells.sh {day_dir}")
    cell_list = list()
    with open("cell_list.txt", "r") as file:
        for line in file.readlines():
            cell_list.append(line.strip())
    os.system("rm cell_list.txt")

    # Load data from rplparallel.mat object, extract trial markers, time stamps and session start timestamp
    rp_file = h5py.File(prefix + day_dir + "/session01/rplparallel.mat")
    rp = rp_file.get('rp').get('data')
    trial_markers = np.array(rp.get('markers'))
    trial_timestamps = np.round(np.array(rp.get('timeStamps')), 3)
    session_start_time = np.round(np.array(rp.get('session_start_sec'))[0,0], 3)
    rp_file.close()

    # Load data and extract spike times from all spiketrain.mat objects
    spike_times = list()
    cell_labels = list()
    for cell_dir in cell_list:
        try:
            spk_file = loadmat(prefix + day_dir + "/session01/" + cell_dir + "/spiketrain.mat")
        except NotImplementedError:
            spk_file = h5py.File(prefix + day_dir + "/session01/" + cell_dir + "/spiketrain.mat")
        except FileNotFoundError:
            continue
        spk = np.array(spk_file.get('timestamps')).flatten() # spike timestamps is loaded in as a column vector
        spk /= 1000 # convert spike timestamps from msec to sec
        spike_times.append(spk)
        if isinstance(spk_file, h5py.File):
            spk_file.close()
        
        cell_name = cell_dir.split('/')
        array, channel, cell = cell_name[0][6:], cell_name[1][7:], cell_name[2][5:]
        if channel[0] == '0':
            channel = channel[1:]
        cell_labels.append(f'a{array}/ch{channel}/c{cell}')

    # Load data from vmpv.mat object, extract session end timestamp
    pv_file = h5py.File(prefix + day_dir + "/session01/1vmpv.mat")
    pv = pv_file.get('pv').get('data')
    session_end_time = np.round(np.array(pv.get('rplmaxtime'))[0,0], 3)
    pv_file.close()


    ### Data preprocessing ###

    # Define important constants
    num_cells = len(cell_labels)
    num_goals = 6
    time_res = 0.001
    tbin_size = 0.05

    # Get trial outcomes from trial markers
    # 0 for unsuccessful trials, 1 for successful trials
    trial_outcomes = 4 - trial_markers[2,:] // 10
    trial_outcomes = trial_outcomes.astype(int)

    # Get poster numbers from trial markers, cue phase time intervals
    trial_markers = trial_markers[0,:] % 10
    trial_markers = trial_markers.astype(int)
    cue_intervals = trial_timestamps[0:2,:].T

    # Get durations of each navigation phase
    nav_intervals = trial_timestamps[1:,:].T
    nav_durations = nav_intervals[:,1] - nav_intervals[:,0]

    # Generate time intervals for each trial
    trial_intervals = np.empty_like(cue_intervals)
    trial_intervals[:,0] = cue_intervals[:,1]
    trial_intervals[:-1,1] = cue_intervals[1:,0]
    trial_intervals[-1,1] = session_end_time

    # Generate time intervals for each inter-trial interval
    iti_intervals = np.empty_like(cue_intervals)
    iti_intervals[:,0] = nav_intervals[:,1]
    iti_intervals[:-1,1] = cue_intervals[1:,0]
    iti_intervals[-1,1] = session_end_time

    # Further differentiate trial markers into trial trajectories (start poster, end poster)
    trial_trajectories = np.zeros((trial_markers.shape[0], 2))
    trial_trajectories[:,1] = trial_markers
    trial_trajectories[1:,0] = trial_markers[:-1]

    # Filter out trials that are too long (> 25 seconds) or have repeated goal from previous trial
    good_trials = np.ones(trial_markers.shape, dtype=np.int8)
    max_dur = 25  # maximum duration of trials (in seconds) to filter out
    prev_goal = 0
    for num, dur in enumerate(nav_durations):
        curr_goal = trial_markers[num]
        if dur > max_dur or curr_goal == prev_goal:
            good_trials[num] = 0
        prev_goal = curr_goal
    trial_filt = np.where(good_trials == 1)

    trial_outcomes = trial_outcomes[trial_filt]
    trial_markers = trial_markers[trial_filt]
    cue_intervals = cue_intervals[trial_filt]
    nav_intervals = nav_intervals[trial_filt]
    nav_durations = nav_durations[trial_filt]
    trial_intervals = trial_intervals[trial_filt]
    iti_intervals = iti_intervals[trial_filt]
    trial_trajectories = trial_trajectories[trial_filt]


    ### Cue phase, 50 ms time bins ###

    # Preallocate list of arrays for spike counts per time bin in cue phase
    # Then slot spikes into cue phase intervals, using time bin resolution specified by tbin_size
    timebins_cue, spikecounts_cue = list(), list()
    for trial in cue_intervals:
        st_time, ed_time = trial
        ed_time = st_time + one_second
        timebins_cue_trial = np.hstack([np.arange(st_time, ed_time, tbin_size).reshape(-1,1), np.arange(st_time, ed_time, tbin_size).reshape(-1,1) + tbin_size])
        timebins_cue.append(timebins_cue_trial)
        spikecounts_cue.append(np.array(spike_counts_per_observation(timebins_cue_trial, spike_times)))

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_cue = [trial/tbin_size for trial in spikecounts_cue]

    # Filter out trials with nans in cue spike rates
    goals_cue = trial_markers.copy()
    trajectories_cue = trial_trajectories.copy()

    # Package variables for saving
    cue_50ms = {'spikerates_cue': spikerates_cue, 'goals_cue': goals_cue, 'trajectories_cue': trajectories_cue}


    ### Inter-trial intervals, 50ms time bins ###

    # Preallocate list of arrays for spike counts per time bin in cue phase
    # Then slot spikes into cue phase intervals, using time bin resolution specified by tbin_size
    timebins_iti, spikecounts_iti = list(), list()
    for trial in iti_intervals:
        st_time, ed_time = trial
        st_time = ed_time - 2 * one_second
        timebins_iti_trial = np.hstack([np.arange(st_time, ed_time, tbin_size).reshape(-1,1), np.arange(st_time, ed_time, tbin_size).reshape(-1,1) + tbin_size])
        timebins_iti.append(timebins_iti_trial)
        spikecounts_iti.append(np.array(spike_counts_per_observation(timebins_iti_trial, spike_times)))

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_iti = [trial/tbin_size for trial in spikecounts_iti]

    # Filter out trials with nans in cue spike rates
    goals_iti = trial_markers.copy()
    trajectories_iti = trial_trajectories.copy()

    # Package variables for saving
    iti_50ms = {'spikerates_iti': spikerates_iti, 'goals_iti': goals_iti, 'trajectories_iti': trajectories_iti}


    ### Nav end phase, 50ms time bins ###

    # Preallocate list of arrays for spike counts per time bin in cue phase
    # Then slot spikes into cue phase intervals, using time bin resolution specified by tbin_size
    timebins_navend, spikecounts_navend = list(), list()
    for trial in nav_intervals:
        st_time, ed_time = trial
        st_time = ed_time - one_second
        timebins_navend_trial = np.hstack([np.arange(st_time, ed_time, tbin_size).reshape(-1,1), np.arange(st_time, ed_time, tbin_size).reshape(-1,1) + tbin_size])
        timebins_navend.append(timebins_navend_trial)
        spikecounts_navend.append(np.array(spike_counts_per_observation(timebins_navend_trial, spike_times)))

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_navend = [trial/tbin_size for trial in spikecounts_navend]

    # Filter out trials with nans in cue spike rates
    goals_navend = trial_markers.copy()
    trajectories_navend = trial_trajectories.copy()

    # Package variables for saving
    navend_50ms = {'spikerates_navend': spikerates_navend, 'goals_navend': goals_navend, 'trajectories_navend': trajectories_navend}


    ### Nav start phase, 50ms time bins ###

    # Preallocate list of arrays for spike counts per time bin in cue phase
    # Then slot spikes into cue phase intervals, using time bin resolution specified by tbin_size
    timebins_navst, spikecounts_navst = list(), list()
    for trial in nav_intervals:
        st_time, ed_time = trial
        ed_time = st_time + one_second
        timebins_navst_trial = np.hstack([np.arange(st_time, ed_time, tbin_size).reshape(-1,1), np.arange(st_time, ed_time, tbin_size).reshape(-1,1) + tbin_size])
        timebins_navst.append(timebins_navst_trial)
        spikecounts_navst.append(np.array(spike_counts_per_observation(timebins_navst_trial, spike_times)))

    # Convert spike counts to average spike rates across duration of cue phase
    spikerates_navst = [trial/tbin_size for trial in spikecounts_navst]

    # Filter out trials with nans in cue spike rates
    goals_navst = trial_markers.copy()
    trajectories_navst = trial_trajectories.copy()

    # Package variables for saving
    navst_50ms = {'spikerates_navst': spikerates_navst, 'goals_navst': goals_navst, 'trajectories_navst': trajectories_navst}


    ### Save processed data to pkl file ###
    for i, label in enumerate(cell_labels):
        label = label.split('/')
        label[1] = label[1][2:]
        if label[1][0] == '0':
            label[1] = label[1][1:]
        cell_labels[i] = f'{day_dir}ch{label[1]}{label[2]}'

    data = {'cue_50ms': cue_50ms, 'navst_50ms': navst_50ms, 'navend_50ms': navend_50ms, 'iti_50ms': iti_50ms,\
            'cell_labels': cell_labels}
    with open(f'{save_dir}_50ms/{day_dir}_data.pkl', 'wb') as file:
        pickle.dump(data, file)