In [1]:
# Import libraries
import numpy as np
import os , csv
from os import listdir
import matplotlib.pyplot as plt
import pandas as pd
from scipy.sparse import csr_matrix
from scipy.interpolate import interp1d
from sklearn.decomposition import TruncatedSVD

In [90]:
data_path= '/media/maria/DATA1/Documents/NeuroMatchAcademy2020_dat/unzipped_files/Richards_2017-10-31.tar'

trials_intervals = np.load(data_path+'/'+'trials.intervals.npy')*1000 # in seconds
spike_times = np.load(data_path+'/'+'spikes.times.npy') * 1000 # Unbinned spike times in ms
trials_gocue_times = np.load(data_path+'/'+'trials.goCue_times.npy') 
trials_response_choice = np.load(data_path+'/'+'trials.response_choice.npy') # -1 left, 1, right, 0 no response
spontaneous_intervals = np.load(data_path+'/'+'spontaneous.intervals.npy')
trials_response_time = np.load(data_path+'/'+'trials.response_times.npy')
spike_clusters = np.load(data_path+'/'+'spikes.clusters.npy')
site_positions = np.load(data_path+'/'+'channels.sitePositions.npy')
clusters_depths = np.load(data_path+'/'+'clusters.depths.npy')
clusters_annotation = np.load(data_path+'/'+'clusters._phy_annotation.npy')
channel_sites = np.load(data_path+'/'+'channels.site.npy')
channels_brainlocation = pd.read_csv(data_path+'/'+'channels.brainLocation.tsv', sep='\t')
clusters_probes = np.load(data_path+'/'+'clusters.probes.npy')
channels_probe = np.load(data_path+'/'+'channels.probe.npy')
trials_visual_time = np.load(data_path+'/'+'trials.visualStim_times.npy')

visual_times = trials_visual_time
# Behaviour data

wheel_movement = np.load(data_path+'/'+'wheelMoves.type.npy')
wheel_intervals = np.load(data_path+'/'+'wheelMoves.intervals.npy')

# Taken from https://github.com/MouseLand/steinmetz2019_NMA/blob/master/steinmetz_loader.py
# To obtain brain regions
def get_good_cells(fdirpath): #
    # location in brain of each neuron
    brain_loc = os.path.join(fdirpath, "channels.brainLocation.tsv")

    good_cells = (np.load(os.path.join(fdirpath, "clusters._phy_annotation.npy")) >= 2 ).flatten()
    clust_channel = np.load(os.path.join(fdirpath, "clusters.peakChannel.npy")).astype(int) - 1
    br = []
    with open(brain_loc, 'r') as tsv:
        tsvin = csv.reader(tsv, delimiter="\t")
        k=0
        for row in tsvin:
            if k>0:
                br.append(row[-1])
            k+=1
    br = np.array(br)
    good_cells = np.logical_and(good_cells, clust_channel.flatten()<len(br))
    brain_region = br[clust_channel[:,0]]


    return good_cells, brain_region, br

# Bin cells according to input bin size

def latency_coding(spike_times,trials_intervals, bin_size = 100):
    
    # Using clusters._phy_annotation.npy obtain valid clusters (i.e. >= 2)
    valid_clusters_idx = np.array(np.where(clusters_annotation>=2))[0]

    spike_time_cells = np.empty(len(valid_clusters_idx), dtype=object) # Initalise empty object
    for i,cell_idx in enumerate(valid_clusters_idx):
      # Create a spike time arrays, where each array in the array is a spike time of a cell
      spike_time_cells[i] = spike_times[(np.where(spike_clusters == cell_idx)[0])]

    #print(spike_time_cells)
    #print(spike_time_cells[0].shape)
    #print(spike_time_cells[1].shape)
    
    first_spike_arr=[]
    for neuron in range(spike_time_cells.shape[0]):
        spk_tms_one_neuron=spike_time_cells[neuron]
        #print(spk_tms_one_neuron)
        first_spike_arr_trial=[]
        for trial_index in range(trials_intervals.shape[0]):
            #print(trials_intervals)
            spks_range = np.bitwise_and(spk_tms_one_neuron>=trials_intervals[trial_index][0],spk_tms_one_neuron<=trials_intervals[trial_index][1])
            #print(spks_range)
            spk_lst=list(spk_tms_one_neuron[spks_range])
            if not spk_lst:
                spk=100000
            else:
                spk=np.sort(spk_lst)[0]
                spk=spk-trials_intervals[trial_index][0]
            first_spike_arr_trial.append(spk)
        first_spike_arr.append(first_spike_arr_trial)
            
    return np.array(first_spike_arr).T

In [91]:
first_spike_arr=latency_coding(spike_times,trials_intervals)

In [89]:
print(np.array(first_spike_arr))

[[ 2.00790519e+03  5.98605188e+02  1.88505188e+02 ...  8.08088380e+01
   4.91086923e+01  3.98419830e+01]
 [ 2.54008887e+02  1.46208887e+02  2.47875554e+02 ...  9.08761843e+01
   1.18242977e+02  2.54492079e+03]
 [ 1.78063012e+03  5.12263455e+02  2.58443012e+03 ...  1.52800370e+01
   4.47515357e+02 -1.00000000e+00]
 ...
 [ 1.03247215e+01 -1.00000000e+00  2.35247215e+01 ...  4.23424853e+01
   9.17566624e+00  1.18442835e+02]
 [ 1.05034242e+02  2.87663424e+03  4.47267576e+02 ...  1.28469367e+02
   5.98023844e+01  1.24257449e+03]
 [ 9.40599956e+01  2.46326662e+02  1.42726662e+02 ...  1.64766393e+01
   5.84434989e+01  2.08977524e+02]]


In [92]:
def logistic_regr_spike_lat(first_spike_arr,trials_response_choice):
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(first_spike_arr, trials_response_choice, test_size=0.33, random_state=42)
    from sklearn.linear_model import LogisticRegression
    clf = LogisticRegression(random_state=0).fit(X_train, y_train)
    print(clf.score(X_train,y_train))
    print(clf.score(X_test,y_test))

In [93]:
logistic_regr_spike_lat(first_spike_arr,trials_response_choice)

1.0
0.686046511627907


  y = column_or_1d(y, warn=True)


In [8]:
print(trials_intervals.shape)

(260, 2)


In [9]:
print(trials_response_choice.shape)

(260, 1)
