In [1]:
from nlb_tools.nwb_interface import NWBDataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import math
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966 (90 deg)
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0 (0 deg)
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793 (180 deg)
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

#Import standard packages
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy import io
from scipy import stats
import pickle

# If you would prefer to load the '.h5' example file rather than the '.pickle' example file. You need the deepdish package
# import deepdish as dd 

#Import function to get the covariate matrix that includes spike history from previous bins
from Neural_Decoding.preprocessing_funcs import get_spikes_with_history

#Import metrics
from Neural_Decoding.metrics import get_R2
from Neural_Decoding.metrics import get_rho

#Import decoder functions
from Neural_Decoding.decoders import WienerCascadeDecoder
from Neural_Decoding.decoders import WienerFilterDecoder
from Neural_Decoding.decoders import DenseNNDecoder
from Neural_Decoding.decoders import SimpleRNNDecoder
from Neural_Decoding.decoders import GRUDecoder
from Neural_Decoding.decoders import LSTMDecoder
from Neural_Decoding.decoders import XGBoostDecoder
from Neural_Decoding.decoders import SVRDecoder

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

def get_sses_pred(y_test,y_test_pred):
    sse=np.sum((y_test_pred-y_test)**2,axis=0)
    return sse

def get_sses_mean(y_test):
    y_mean=np.mean(y_test,axis=0)
    sse_mean=np.sum((y_test-y_mean)**2,axis=0)
    return sse_mean

def nans(shape, dtype=float):
    a = np.empty(shape, dtype)
    a.fill(np.nan)
    return a

def vector_reject(u,v):
    #project u on v, subtract u1 from u
    P = np.outer(v,(v.T))/(v@(v.T))
    u_sub = u - P@u
#     another calculation, to double-check
#     v_norm = np.sqrt(sum(v**2))    
#     proj_u_on_v = (np.dot(u, v)/v_norm**2)*v
#     u_sub = u - proj_u_on_v
    return u_sub

def calc_proj_matrix(A):
    return A@np.linalg.inv(A.T@A)@A.T
def calc_proj(b, A):
    P = calc_proj_matrix(A)
    return P@b.T





# Single Lag

In [2]:
foldername = "~/area2_population_analysis/s1-kinematics/actpas_NWB/"
monkey = "Han_20171207"
filename = foldername + monkey + "_COactpas_TD.nwb"

dataset_5ms = NWBDataset(filename, split_heldout=False)

xy_vel = dataset_5ms.data['hand_vel'].to_numpy()
xy_acc = np.diff(xy_vel, axis = 0, prepend=[xy_vel[0]])
dataset_5ms.add_continuous_data(xy_acc,'hand_acc',chan_names = ['x','y'])

dataset_5ms.resample(5)
dataset_5ms.smooth_spk(40, name='smth_40')
# dataset_5ms.smooth_spk(20, name='smth_20')
bin_width = dataset_5ms.bin_width
print(bin_width)

5


In [3]:
# dataset_5ms.smooth_spk(20, name='smth_20')
# all_data = np.array(dataset_5ms.data.spikes_smth_20)
# print(all_data.shape)
# data_for_pca = all_data[~np.isnan(all_data).any(axis=1)]
# print(data_for_pca.shape)

# scaler = StandardScaler()
# X = scaler.fit_transform(data_for_pca)
# pca = PCA(n_components=n_dims)
# X = pca.fit(X)

# PCA_data = nans([all_data.shape[0],n_dims])
# idx = 0
# for dp in all_data:
#     dp = dp.reshape((1, -1))
#     if np.isnan(dp).any():
#         dp_pca = nans([1,n_dims])
#     else:
#         dp_pca = pca.transform(scaler.transform(dp))
#     PCA_data[idx,:] = dp_pca
#     idx+=1
# print(PCA_data.shape)
# dataset_5ms.add_continuous_data(PCA_data,'PCA_20')
# print('PCA total var explained:',sum(pca.explained_variance_ratio_))

In [4]:
n_dims = 20 # for PCA

passive_mask = (dataset_5ms.trial_info.ctr_hold_bump) & (dataset_5ms.trial_info.split != 'none')


trial_mask = passive_mask
n_trials = dataset_5ms.trial_info.loc[trial_mask].shape[0]
print(n_trials,'trials')
n_neurons = dataset_5ms.data.spikes.shape[1]
print(n_neurons,'neurons')

all_data = np.array(dataset_5ms.data.spikes_smth_40)
print(all_data.shape)
data_for_pca = all_data[~np.isnan(all_data).any(axis=1)]
print(data_for_pca.shape)

scaler = StandardScaler()
X = scaler.fit_transform(data_for_pca)
pca = PCA(n_components=n_dims)
X = pca.fit(X)

PCA_data = nans([all_data.shape[0],n_dims])
idx = 0
for dp in all_data:
    dp = dp.reshape((1, -1))
    if np.isnan(dp).any():
        dp_pca = nans([1,n_dims])
    else:
        dp_pca = pca.transform(scaler.transform(dp))
    PCA_data[idx,:] = dp_pca
    idx+=1
print(PCA_data.shape)
dataset_5ms.add_continuous_data(PCA_data,'PCA')
print('PCA total var explained:',sum(pca.explained_variance_ratio_))

218 trials
153 neurons
(558262, 153)
(558262, 153)
(558262, 20)
PCA total var explained: 0.3873818396730201


In [5]:
def process_train_test(X,y,training_set,test_set):
    X_train = X[training_set,:,:]
    X_test = X[test_set,:,:]
    y_train = y[training_set,:,:]
    y_test = y[test_set,:,:]

    #flat by trials
    X_flat_train = X_train.reshape((X_train.shape[0]*X_train.shape[1]),X_train.shape[2])
    X_flat_test = X_test.reshape((X_test.shape[0]*X_test.shape[1]),X_test.shape[2])
    y_train=y_train.reshape((y_train.shape[0]*y_train.shape[1]),y_train.shape[2])
    y_test=y_test.reshape((y_test.shape[0]*y_test.shape[1]),y_test.shape[2])
    
    X_flat_train_mean=np.nanmean(X_flat_train,axis=0)
    X_flat_train_std=np.nanstd(X_flat_train,axis=0)   
    #array with only 0 will have 0 std and cause errors
    X_flat_train_std[X_flat_train_std==0] = 1
    
    X_flat_train=(X_flat_train-X_flat_train_mean)/X_flat_train_std
    X_flat_test=(X_flat_test-X_flat_train_mean)/X_flat_train_std
    y_train_mean=np.mean(y_train,axis=0)
    y_train=y_train-y_train_mean
    y_test=y_test-y_train_mean    
    
    return X_flat_train,X_flat_test,y_train,y_test

In [6]:
def fit_and_predict(dataset, trial_mask, align_field, align_range, lag, x_field, y_field):
    """Extracts spiking and kinematic data from selected trials and fits linear decoder"""
    # Extract rate data from selected trials
    vel_df = dataset.make_trial_data(align_field=align_field, align_range=align_range, ignored_trials=~trial_mask)
    # Lag alignment for kinematics and extract kinematics data from selected trials
    lag_align_range = (align_range[0] + lag, align_range[1] + lag)
    rates_df = dataset.make_trial_data(align_field=align_field, align_range=lag_align_range, ignored_trials=~trial_mask)
    
    n_trials = rates_df['trial_id'].nunique()
    n_timepoints = int((align_range[1] - align_range[0])/dataset.bin_width)
    n_neurons = rates_df[x_field].shape[1]
    
    lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)})
    rates_array = rates_df[x_field].to_numpy()
    vel_array = vel_df[y_field].to_numpy()
    lr_all.fit(rates_array, vel_array)
    pred_vel = lr_all.predict(rates_array)
    vel_df = pd.concat([vel_df, pd.DataFrame(pred_vel, columns=dataset._make_midx('pred_vel', ['x', 'y'], 2))], axis=1)
     
    rates_array = rates_array.reshape(n_trials, n_timepoints, n_neurons)
    vel_array = vel_array.reshape(n_trials, n_timepoints, 2)
    
    kf = KFold(n_splits=5,shuffle=True,random_state = 42)   
    true_concat = nans([n_trials*n_timepoints,2])
    pred_concat = nans([n_trials*n_timepoints,2])
    trial_save_idx = 0
    for training_set, test_set in kf.split(range(0,n_trials)):
        #split training and testing by trials
        X_train, X_test, y_train, y_test = process_train_test(rates_array,vel_array,training_set,test_set)
        lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)}) 
        lr.fit(X_train, y_train)
        y_test_predicted = lr.predict(X_test)
        
        n = y_test_predicted.shape[0]
        true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
        pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
        trial_save_idx += n
    
    sses =get_sses_pred(true_concat,pred_concat)
    sses_mean=get_sses_mean(true_concat)
    R2 =1-np.sum(sses)/np.sum(sses_mean)     
    print('R2:',R2) 
    return R2, lr_all.best_estimator_.coef_, vel_df

In [7]:
# def fit_and_predict_weighted(dataset, trial_mask, align_field, align_range, lag, x_field, y_field):
#     """Extracts spiking and kinematic data from selected trials and fits linear decoder"""
#     # Extract rate data from selected trials
#     vel_df = dataset.make_trial_data(align_field=align_field, align_range=align_range, ignored_trials=~trial_mask)
#     # Lag alignment for kinematics and extract kinematics data from selected trials
#     lag_align_range = (align_range[0] + lag, align_range[1] + lag)
#     rates_df = dataset.make_trial_data(align_field=align_field, align_range=lag_align_range, ignored_trials=~trial_mask)
    
#     n_trials = rates_df['trial_id'].nunique()
#     n_timepoints = int((align_range[1] - align_range[0])/dataset.bin_width)
#     n_neurons = rates_df[x_field].shape[1]
    
#     lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)})
#     rates_array = rates_df[x_field].to_numpy()
#     vel_array = vel_df[y_field].to_numpy()
    
#     vel_array_reshaped = vel_array.reshape(n_trials, n_timepoints, 2)
#     sw = 1/((np.std(vel_array_reshaped[:,:,0],axis = 0) + np.std(vel_array_reshaped[:,:,1],axis = 0))/2)
    
#     lr_all.fit(rates_array, vel_array,sample_weight = np.tile(sw,n_trials))
#     pred_vel = lr_all.predict(rates_array)
#     vel_df = pd.concat([vel_df, pd.DataFrame(pred_vel, columns=dataset._make_midx('pred_vel', ['x', 'y'], 2))], axis=1)
# #     print(lr_all.best_params_['alpha'])
    
#     rates_array = rates_array.reshape(n_trials, n_timepoints, n_neurons)
#     vel_array = vel_array_reshaped
    
#     kf = KFold(n_splits=5,shuffle=True,random_state = 42)   
#     true_concat = nans([n_trials*n_timepoints,2])
#     pred_concat = nans([n_trials*n_timepoints,2])
#     trial_save_idx = 0
#     for training_set, test_set in kf.split(range(0,n_trials)):
#         #split training and testing by trials
#         X_train, X_test, y_train, y_test = process_train_test(rates_array,vel_array,training_set,test_set)
#         lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)}) 
#         lr.fit(X_train, y_train,sample_weight = np.tile(sw,training_set.shape[0]))
#         y_test_predicted = lr.predict(X_test)
        
#         n = y_test_predicted.shape[0]
#         true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
#         pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
#         trial_save_idx += n
    
#     sses =get_sses_pred(true_concat,pred_concat)
#     sses_mean=get_sses_mean(true_concat)
#     R2 =1-np.sum(sses)/np.sum(sses_mean)     
#     print('R2:',R2) 
    
#     return R2, lr_all.best_estimator_.coef_, vel_df

In [None]:
# def fit_and_predict_DNN(dataset, trial_mask, align_field, align_range, lag, x_field, y_field):
#     """Extracts spiking and kinematic data from selected trials and fits linear decoder"""
#     # Extract rate data from selected trials
#     vel_df = dataset.make_trial_data(align_field=align_field, align_range=align_range, ignored_trials=~trial_mask)
#     # Lag alignment for kinematics and extract kinematics data from selected trials
#     lag_align_range = (align_range[0] + lag, align_range[1] + lag)
#     rates_df = dataset.make_trial_data(align_field=align_field, align_range=lag_align_range, ignored_trials=~trial_mask)
    
#     n_trials = rates_df['trial_id'].nunique()
#     n_timepoints = int((align_range[1] - align_range[0])/dataset.bin_width)
#     n_neurons = rates_df[x_field].shape[1]
    
#     dnn_all = DenseNNDecoder(units=400,dropout=0.25,num_epochs=10)
#     rates_array = rates_df[x_field].to_numpy()
#     vel_array = vel_df[y_field].to_numpy()
#     dnn_all.fit(rates_array, vel_array)
#     pred_vel = dnn_all.predict(rates_array)
#     vel_df = pd.concat([vel_df, pd.DataFrame(pred_vel, columns=dataset._make_midx('pred_vel', ['x', 'y'], 2))], axis=1)
    
#     rates_array = rates_array.reshape(n_trials, n_timepoints, n_neurons)
#     vel_array = vel_array.reshape(n_trials, n_timepoints, 2)
    
#     kf = KFold(n_splits=5,shuffle=True,random_state = 42)   
#     true_concat = nans([n_trials*n_timepoints,2])
#     pred_concat = nans([n_trials*n_timepoints,2])
#     trial_save_idx = 0
#     for training_set, test_set in kf.split(range(0,n_trials)):
#         #split training and testing by trials
#         X_train, X_test, y_train, y_test = process_train_test(rates_array,vel_array,training_set,test_set)
# #         lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)}) 
# #         lr.fit(X_train, y_train)
#         dnn = DenseNNDecoder(units=400,dropout=0.25,num_epochs=10)
#         dnn.fit(X_train, y_train)
#         y_test_predicted = dnn.predict(X_test)
        
#         n = y_test_predicted.shape[0]
#         true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
#         pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
#         trial_save_idx += n
    
#     sses =get_sses_pred(true_concat,pred_concat)
#     sses_mean=get_sses_mean(true_concat)
#     R2 =1-np.sum(sses)/np.sum(sses_mean)     
#     print('R2:',R2) 
#     return R2, vel_df

In [8]:
def sub_and_predict(dataset, trial_mask, align_field, align_range, lag, x_field, y_field, weights):
    """Extracts spiking and kinematic data from selected trials and fits linear decoder"""
    # Extract rate data from selected trials
    vel_df = dataset.make_trial_data(align_field=align_field, align_range=align_range, ignored_trials=~trial_mask)
    # Lag alignment for kinematics and extract kinematics data from selected trials
    lag_align_range = (align_range[0] + lag, align_range[1] + lag)
    rates_df = dataset.make_trial_data(align_field=align_field, align_range=lag_align_range, ignored_trials=~trial_mask)
    
    n_trials = rates_df['trial_id'].nunique()
    n_timepoints = int((align_range[1] - align_range[0])/dataset.bin_width)
    n_neurons = rates_df[x_field].shape[1]

    rates_array = rates_df[x_field].to_numpy() - calc_proj(rates_df[x_field].to_numpy(),weights.T).T
    vel_array = vel_df[y_field].to_numpy()
    
    lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)})
    lr_all.fit(rates_array, vel_array)
    pred_vel = lr_all.predict(rates_array)
    vel_df = pd.concat([vel_df, pd.DataFrame(pred_vel, columns=dataset._make_midx('pred_vel', ['x', 'y'], 2))], axis=1)
         
    rates_array = rates_array.reshape(n_trials, n_timepoints, n_neurons)
    vel_array = vel_array.reshape(n_trials, n_timepoints, 2)
    
    kf = KFold(n_splits=5,shuffle=True,random_state = 42)   
    true_concat = nans([n_trials*n_timepoints,2])
    pred_concat = nans([n_trials*n_timepoints,2])
    trial_save_idx = 0
    for training_set, test_set in kf.split(range(0,n_trials)):
        #split training and testing by trials
        X_train, X_test, y_train, y_test = process_train_test(rates_array,vel_array,training_set,test_set)
        lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)}) 
        lr.fit(X_train, y_train)
        y_test_predicted = lr.predict(X_test)
        
        n = y_test_predicted.shape[0]
        true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
        pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
        trial_save_idx += n
    
    sses =get_sses_pred(true_concat,pred_concat)
    sses_mean=get_sses_mean(true_concat)
    R2 =1-np.sum(sses)/np.sum(sses_mean)     
    print('R2:',R2) 
    return R2, lr_all.best_estimator_.coef_, vel_df

## with Neurons

In [9]:
lag_axis = np.arange(-300,300,20)
x_field = 'spikes_smth_40'
y_field ='hand_acc'
trial_mask = passive_mask

# Prepare for plotting
plot_dir = [0.0, 90.0, 180.0, 270.0] # limit plot directions to reduce cluttering
plot_dim = 'x' # plot x velocity
colors = ['red', 'blue', 'green', 'orange']

figDir = "/Users/sherryan/area2_population_analysis/figures/neurons/pas/"
dim = n_neurons

In [10]:
ranges = [(0,120),(-100,120)]
labels = ['_early_acc_','_long_acc_']

for pred_range, label in zip(ranges, labels):
    x_axis = np.arange(pred_range[0], pred_range[1], dataset_5ms.bin_width)
    curr_r2_array = nans([len(lag_axis)])
    curr_coef_array = nans([len(lag_axis),2,dim])
    for i in range(len(lag_axis)):
        lag = lag_axis[i]
        r2, coef, _ = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag, x_field, y_field)
        curr_r2_array[i] = r2
        curr_coef_array[i,:,:] = coef

    idx_max = np.argmax(curr_r2_array)
    time_max = lag_axis[idx_max]
    _, _, vel_df = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field)
    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial[y_field][plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel(plot_dim + '_' + y_field)
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + 'true.png', dpi = 'figure')
    plt.close()

    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel(plot_dim + '_' + y_field)
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(0) +'_pred.png', dpi = 'figure')
    plt.close()

    plt.plot(lag_axis, curr_r2_array)
    plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
    plt.legend()
    plt.title('R2 score predicting ' + y_field + ' ' + str(pred_range))
    plt.xlabel('Time lag (ms)')
    plt.ylabel('R2')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(0) +'.png', dpi = 'figure')
    plt.close()

    weights = curr_coef_array[idx_max,:,:]
    for iter in range(0,3):  
        #subtract predictions with primary decoding dimensions (at time with max R2)
        sub_coef_array = nans([len(lag_axis),2,dim])
        sub_r2_array = nans([len(lag_axis)])

        for i in range(len(lag_axis)):
            lag = lag_axis[i]
            r2, coef,_ = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag,x_field,y_field,weights)
            sub_r2_array[i] = r2
            sub_coef_array[i,:,:] = coef

        plt.plot(lag_axis,sub_r2_array)
        plt.title('R2 score projecting out #'+ str(iter+1) +' t_max dim')
        idx_max = np.argmax(sub_r2_array)
        time_max = lag_axis[idx_max]
        plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
        plt.legend()
        plt.xlabel('Time lag (ms)')
        plt.ylabel('R2')
        plt.tight_layout()
        plt.savefig(figDir + monkey + label + str(iter+1) +'.png', dpi = 'figure')
        plt.close()

        _, _, vel_df = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field,weights)
        for trial_dir, color in zip(plot_dir, colors):
            cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
            for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
                plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
        plt.xlabel('Time (ms)')
        plt.ylabel(plot_dim + '_' + y_field)
        plt.tight_layout()
        plt.savefig(figDir + monkey + label + str(iter+1) +'_pred.png', dpi = 'figure')
        plt.close()
        
        #stack the decoding dimensions to be projected out
        weights = np.vstack((weights,sub_coef_array[idx_max,:,:]))

R2: -0.8898638295068193
R2: -0.8522369802251266
R2: -0.8115975471472179
R2: -0.7554997755283628
R2: -0.7434737854485292
R2: -0.8007964321957166
R2: -0.8487756045048165
R2: -0.8657643564906503
R2: -0.7593342565978145
R2: -0.5684445507811888
R2: -0.4060243678998967
R2: -0.2863172540047627
R2: -0.15810479577832992
R2: 0.033515227502269984
R2: 0.29157565582409484
R2: 0.5066640559547004
R2: 0.6477902366082235
R2: 0.7305828697513406
R2: 0.7583910064013076
R2: 0.7467042880296995
R2: 0.7199564239940925
R2: 0.7054028626006914
R2: 0.6795660585386087
R2: 0.6564142441251788
R2: 0.6455568915750696
R2: 0.6538849706436916
R2: 0.6662209084230863
R2: 0.6686396634423555
R2: 0.657763232808591
R2: 0.6361977508709402
R2: 0.7583910064013076
R2: -0.9448132497141755
R2: -0.8619628464745701
R2: -0.7970334097899336
R2: -0.7396899696486097
R2: -0.7270371935971573
R2: -0.7730857557574966
R2: -0.8191079451883296
R2: -0.8351801823971765
R2: -0.7448177505344904
R2: -0.5621837290654956
R2: -0.4060072491913673
R2: -0.

KeyboardInterrupt: 

## with PCA

In [11]:
x_field = 'PCA'
y_field ='hand_acc'
lag_axis = np.arange(-300,300,20)

# Prepare for plotting
plot_dir = [0.0, 90.0, 180.0, 270.0] # limit plot directions to reduce cluttering
plot_dim = 'x' # plot x velocity
colors = ['red', 'blue', 'green', 'orange']


figDir = "/Users/sherryan/area2_population_analysis/figures/PCA/pas/"
dim = n_dims

In [12]:
# plot_range = (-100, 120)
# vel_df = dataset_5ms.make_trial_data(align_field='move_onset_time', align_range=plot_range, ignored_trials=~passive_mask)

# x_axis = np.arange(plot_range[0], plot_range[1], dataset_5ms.bin_width)
# plot_dims = 10
# fig,ax=plt.subplots(plot_dims,1,figsize=(10,20))

# for i in range(plot_dims):
#     for _, trial in vel_df.groupby('trial_id'):
#         ax[i].plot(x_axis,trial.PCA.to_numpy()[:,i], color = 'k',linewidth = 0.5)
#         ax[i].axvline(0,color ='k',ls = '--')
#         if i<plot_dims-1:
#             ax[i].set_xticks([])
#         else:
#             ax[i].set_xlabel('Time (ms)')
#         ax[i].set_ylabel('Dim. '+str(i+1))

In [13]:
ranges = [(0,120),(-100,120)]
labels = ['_early_acc_','_long_acc_']

for pred_range, label in zip(ranges, labels):
    x_axis = np.arange(pred_range[0], pred_range[1], dataset_5ms.bin_width)
    curr_r2_array = nans([len(lag_axis)])
    curr_coef_array = nans([len(lag_axis),2,dim])
    for i in range(len(lag_axis)):
        lag = lag_axis[i]
        r2, coef, _ = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag, x_field, y_field)
        curr_r2_array[i] = r2
        curr_coef_array[i,:,:] = coef

    idx_max = np.argmax(curr_r2_array)
    time_max = lag_axis[idx_max]
    _, _, vel_df = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field)
    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial[y_field][plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel(plot_dim + '_' + y_field)
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + 'true.png', dpi = 'figure')
    plt.close()

    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel(plot_dim + '_' + y_field)
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(0) +'_pred.png', dpi = 'figure')
    plt.close()

    plt.plot(lag_axis, curr_r2_array)
    plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
    plt.legend()
    plt.title('R2 score predicting ' + y_field + ' ' + str(pred_range))
    plt.xlabel('Time lag (ms)')
    plt.ylabel('R2')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(0) +'.png', dpi = 'figure')
    plt.close()

    weights = curr_coef_array[idx_max,:,:]
    for iter in range(0,3):  
        #subtract predictions with primary decoding dimensions (at time with max R2)
        sub_coef_array = nans([len(lag_axis),2,dim])
        sub_r2_array = nans([len(lag_axis)])

        for i in range(len(lag_axis)):
            lag = lag_axis[i]
            r2, coef,_ = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag,x_field,y_field,weights)
            sub_r2_array[i] = r2
            sub_coef_array[i,:,:] = coef

        plt.plot(lag_axis,sub_r2_array)
        plt.title('R2 score projecting out #'+ str(iter+1) +' t_max dim')
        idx_max = np.argmax(sub_r2_array)
        time_max = lag_axis[idx_max]
        plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
        plt.legend()
        plt.xlabel('Time lag (ms)')
        plt.ylabel('R2')
        plt.tight_layout()
        plt.savefig(figDir + monkey + label + str(iter+1) +'.png', dpi = 'figure')
        plt.close()

        _, _, vel_df = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field,weights)
        for trial_dir, color in zip(plot_dir, colors):
            cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
            for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
                plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
        plt.xlabel('Time (ms)')
        plt.ylabel(plot_dim + '_' + y_field)
        plt.tight_layout()
        plt.savefig(figDir + monkey + label + str(iter+1) +'_pred.png', dpi = 'figure')
        plt.close()
        
        #stack the decoding dimensions to be projected out
        weights = np.vstack((weights,sub_coef_array[idx_max,:,:]))

R2: -0.08185084354804384
R2: -0.08503156752719776
R2: -0.099260865743386
R2: -0.12084635139438826
R2: -0.14382787417843979
R2: -0.15946896021294288
R2: -0.16126559041643618
R2: -0.15096345037490355
R2: -0.13301125416968285
R2: -0.09687321215897726
R2: -0.014779485043502794
R2: 0.09435603376549107
R2: 0.21251853814519528
R2: 0.34892591393214245
R2: 0.5010501081903735
R2: 0.6389287255461553
R2: 0.7322283702930646
R2: 0.779981000658927
R2: 0.7963802455990409
R2: 0.7918502448396385
R2: 0.7736296944859244
R2: 0.7499037208940389
R2: 0.728608537302551
R2: 0.7135885381151439
R2: 0.7056044782599517
R2: 0.7024800859209658
R2: 0.6992154087210349
R2: 0.6939409233683762
R2: 0.6876517972457852
R2: 0.6776900518848417
R2: 0.7963802455990409
R2: -0.07522485916783506
R2: -0.07428959317880635
R2: -0.08273633159362936
R2: -0.10245193397270858
R2: -0.1286173890851745
R2: -0.1492752517310909
R2: -0.15530121197682223
R2: -0.14631835712185404
R2: -0.12644146442369397
R2: -0.09227227599987664
R2: -0.0276640513

# Multi Lags

In [15]:
dataset_50ms = NWBDataset(filename, split_heldout=False)
xy_vel = dataset_50ms.data['hand_vel'].to_numpy()
xy_acc = np.diff(xy_vel, axis = 0, prepend=[xy_vel[0]])
dataset_50ms.add_continuous_data(xy_acc,'hand_acc',chan_names = ['x','y'])

dataset_50ms.resample(50)
print(dataset_50ms.bin_width)

50


In [16]:
n_dims = 20 # for PCA

passive_mask = (dataset_50ms.trial_info.ctr_hold_bump) & (dataset_50ms.trial_info.split != 'none')

trial_mask = passive_mask
n_trials = dataset_50ms.trial_info.loc[trial_mask].shape[0]
print(n_trials,'trials')
n_neurons = dataset_50ms.data.spikes.shape[1]
print(n_neurons,'neurons')

all_data = np.array(dataset_50ms.data.spikes)
print(all_data.shape)
data_for_pca = all_data[~np.isnan(all_data).any(axis=1)]
print(data_for_pca.shape)

scaler = StandardScaler()
X = scaler.fit_transform(data_for_pca)
pca = PCA(n_components=n_dims)
X = pca.fit(X)

PCA_data = nans([all_data.shape[0],n_dims])
idx = 0
for dp in all_data:
    dp = dp.reshape((1, -1))
    if np.isnan(dp).any():
        dp_pca = nans([1,n_dims])
    else:
        dp_pca = pca.transform(scaler.transform(dp))
    PCA_data[idx,:] = dp_pca
    idx+=1
print(PCA_data.shape)
dataset_50ms.add_continuous_data(PCA_data,'PCA')
print('PCA total var explained:',sum(pca.explained_variance_ratio_))

218 trials
153 neurons
(55827, 153)
(55827, 153)
(55827, 20)
PCA total var explained: 0.29862936918522437


In [17]:
passive_data = dataset_50ms.make_trial_data(align_field='move_onset_time', align_range=(-400, 700), ignored_trials=~trial_mask)
for idx, trial in passive_data.groupby('trial_id'):
    n_timepoints = trial.shape[0]
    break
print(n_timepoints,'time bins')

passive_trials_neuron = nans([n_trials,n_timepoints,n_neurons])
passive_trials_vel = nans([n_trials,n_timepoints,2])
passive_trials_acc = nans([n_trials,n_timepoints,2])
passive_trials_pca = nans([n_trials,n_timepoints,n_dims])
i = 0
for idx, trial in passive_data.groupby('trial_id'):
    passive_trials_neuron[i,:,:]=trial.spikes.to_numpy()
    passive_trials_vel[i,:,:]=trial.hand_vel.to_numpy()
    passive_trials_acc[i,:,:]=trial.hand_acc.to_numpy()
    passive_trials_pca[i,:,:]=trial.PCA.to_numpy()
    i+=1
print(passive_trials_neuron.shape)
print(passive_trials_vel.shape)
print(passive_trials_acc.shape)
print(passive_trials_pca.shape)


22 time bins
(218, 22, 153)
(218, 22, 2)
(218, 22, 2)
(218, 22, 20)


## with Neurons

In [18]:
data_range = [-400,700]
figDir = "/Users/sherryan/area2_population_analysis/figures/neurons/pas/"
passive_x = passive_trials_neuron
passive_y = passive_trials_acc
y_type = 'acceleration'

ranges = [(0,120),(-100,120)]
labels = ['early_acc','long_acc']

dim = n_neurons

In [20]:
for pred_range, label in zip(ranges, labels):

    idx1 = int((pred_range[0] - data_range[0])/dataset_50ms.bin_width)
    idx2 = int(n_timepoints - (data_range[1]-pred_range[1])/dataset_50ms.bin_width)

    t_before_range = range(0,301,50);
    t_after_range = range(0,501,50);

    multi_R2s = nans([len(t_before_range),len(t_after_range)])
    multi_coefs = []
    j,k=0,0
    for time_before in t_before_range:
        coef_arr = []
        for time_after in t_after_range:
            print('Predicting with',-time_before, 'to', time_after,'ms neural data')

            bins_before= int(time_before/dataset_50ms.bin_width) #How many bins of neural data prior to the output are used for decoding
            bins_current= 1 #Whether to use concurrent time bin of neural data
            bins_after= int(time_after/dataset_50ms.bin_width) #How many bins of neural data after the output are used for decoding

            n_total_bins = bins_before + bins_current + bins_after

            X =  nans([n_trials,idx2-idx1,n_total_bins*dim])
            i = 0
            for trial_data in passive_x:
                trial_hist=get_spikes_with_history(trial_data,bins_before,bins_after,bins_current)
                trial_hist = trial_hist[idx1:idx2,:,:]
                trial_hist_flat=trial_hist.reshape(trial_hist.shape[0],(trial_hist.shape[1]*trial_hist.shape[2]))
                X[i,:,:] = trial_hist_flat
                i+=1
            y = passive_y[:,idx1:idx2,:]

            lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)})
            X_reshaped = X.reshape((X.shape[0]*X.shape[1]),X.shape[2])
            y_reshaped = y.reshape((y.shape[0]*y.shape[1]),y.shape[2])
            lr_all.fit(X_reshaped, y_reshaped)
            print(lr_all.best_params_['alpha'])

            kf = KFold(n_splits=5,shuffle=True,random_state = 42)   
            true_concat = nans([(n_trials*(idx2-idx1)),2])
            pred_concat = nans([(n_trials*(idx2-idx1)),2])
            trial_save_idx = 0
            for training_set, test_set in kf.split(range(0,n_trials)):
                #split training and testing by trials
                X_train, X_test, y_train, y_test = process_train_test(X,y,training_set,test_set)
                lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)}) 
                lr.fit(X_train, y_train)
                y_test_predicted = lr.predict(X_test)
                n = y_test_predicted.shape[0]
                true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
                pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
                trial_save_idx += n

            sses =get_sses_pred(true_concat,pred_concat)
            sses_mean=get_sses_mean(true_concat)
            multi_R2s[j,k] =1-np.sum(sses)/np.sum(sses_mean)     
            print('R2:',multi_R2s[j,k])
            coef_arr.append(lr_all.best_estimator_.coef_)
            k += 1
        j += 1
        k = 0
        multi_coefs.append(coef_arr)

    fig, ax = plt.subplots()
    im = ax.imshow(multi_R2s)
    ax.set_xlabel('Length of lagging info')
    ax.set_ylabel('Length of leading info')

    ax.set_xticks(np.arange(len(t_after_range)))
    ax.set_yticks(np.arange(len(t_before_range)))
    ax.set_xticklabels(labels=t_after_range)
    ax.set_yticklabels(labels=t_before_range)

    ax.set_title('R2 predicting ' + str(pred_range) + ' ' + y_type +'\nwith different lagging/leading info')
    fig.tight_layout()

    for i in range(len(t_before_range)):
        for j in range(len(t_after_range)):
            text = ax.text(j, i, str(int(multi_R2s[i, j]*1000)/1000),
                           ha="center", va="center", color="w")

    plt.tight_layout()
    plt.savefig(figDir + monkey + '_multi_' + label + '.png', dpi = 'figure')
    plt.close()

Predicting with 0 to 0 ms neural data
1000.0
R2: 0.805239900695789
Predicting with 0 to 50 ms neural data
1000.0
R2: 0.868496336807755
Predicting with 0 to 100 ms neural data
1000.0
R2: 0.8926474318137211
Predicting with 0 to 150 ms neural data
1000.0
R2: 0.9030208987855227
Predicting with 0 to 200 ms neural data
1000.0
R2: 0.9052965666073302
Predicting with 0 to 250 ms neural data
1000.0
R2: 0.9118100753678338
Predicting with 0 to 300 ms neural data
1000.0
R2: 0.9164170928322575
Predicting with 0 to 350 ms neural data
1000.0
R2: 0.9176492448579978
Predicting with 0 to 400 ms neural data
1000.0
R2: 0.9187437646802838
Predicting with 0 to 450 ms neural data
1000.0
R2: 0.9180105858015866
Predicting with 0 to 500 ms neural data
1000.0
R2: 0.9164616042911174
Predicting with -50 to 0 ms neural data
1000.0
R2: 0.7795034028320339
Predicting with -50 to 50 ms neural data
1000.0
R2: 0.871847081813525
Predicting with -50 to 100 ms neural data
1000.0
R2: 0.8915432679729756
Predicting with -50 to 

  plt.tight_layout()


Predicting with 0 to 0 ms neural data
1000.0
R2: 0.7606016722516974
Predicting with 0 to 50 ms neural data
1000.0
R2: 0.8366244148232961
Predicting with 0 to 100 ms neural data
1000.0
R2: 0.8547987086625825
Predicting with 0 to 150 ms neural data
1000.0
R2: 0.8650211708095374
Predicting with 0 to 200 ms neural data
1000.0
R2: 0.8682003247484064
Predicting with 0 to 250 ms neural data
1000.0
R2: 0.8715165115673208
Predicting with 0 to 300 ms neural data
1000.0
R2: 0.8741721034121513
Predicting with 0 to 350 ms neural data
1000.0
R2: 0.8708049144830092
Predicting with 0 to 400 ms neural data
1000.0
R2: 0.8688116949017466
Predicting with 0 to 450 ms neural data
1000.0
R2: 0.8689309882741179
Predicting with 0 to 500 ms neural data
1000.0
R2: 0.8669939847624489
Predicting with -50 to 0 ms neural data
1000.0
R2: 0.7658868226088662
Predicting with -50 to 50 ms neural data
1000.0
R2: 0.8403682954594863
Predicting with -50 to 100 ms neural data
1000.0
R2: 0.8570062014633169
Predicting with -50 

  plt.tight_layout()


## with PCA

In [21]:
data_range = [-400,700]
figDir = "/Users/sherryan/area2_population_analysis/figures/PCA/pas/"
passive_x = passive_trials_pca
passive_y = passive_trials_acc
y_type = 'acceleration'

ranges = [(0,120),(-100,120)]
labels = ['early_acc','long_acc']

dim = n_dims

In [24]:
for pred_range, label in zip(ranges, labels):

    idx1 = int((pred_range[0] - data_range[0])/dataset_50ms.bin_width)
    idx2 = int(n_timepoints - (data_range[1]-pred_range[1])/dataset_50ms.bin_width)

    t_before_range = range(0,301,50);
    t_after_range = range(0,501,50);

    multi_R2s = nans([len(t_before_range),len(t_after_range)])
    multi_coefs = []
    j,k=0,0
    for time_before in t_before_range:
        coef_arr = []
        for time_after in t_after_range:
            print('Predicting with',-time_before, 'to', time_after,'ms neural data')

            bins_before= int(time_before/dataset_50ms.bin_width) #How many bins of neural data prior to the output are used for decoding
            bins_current= 1 #Whether to use concurrent time bin of neural data
            bins_after= int(time_after/dataset_50ms.bin_width) #How many bins of neural data after the output are used for decoding

            n_total_bins = bins_before + bins_current + bins_after

            X =  nans([n_trials,idx2-idx1,n_total_bins*dim])
            i = 0
            for trial_data in passive_x:
                trial_hist=get_spikes_with_history(trial_data,bins_before,bins_after,bins_current)
                trial_hist = trial_hist[idx1:idx2,:,:]
                trial_hist_flat=trial_hist.reshape(trial_hist.shape[0],(trial_hist.shape[1]*trial_hist.shape[2]))
                X[i,:,:] = trial_hist_flat
                i+=1
            y = passive_y[:,idx1:idx2,:]

            lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)})
            X_reshaped = X.reshape((X.shape[0]*X.shape[1]),X.shape[2])
            y_reshaped = y.reshape((y.shape[0]*y.shape[1]),y.shape[2])
            lr_all.fit(X_reshaped, y_reshaped)
            print(lr_all.best_params_['alpha'])

            kf = KFold(n_splits=5,shuffle=True,random_state = 42)   
            true_concat = nans([(n_trials*(idx2-idx1)),2])
            pred_concat = nans([(n_trials*(idx2-idx1)),2])
            trial_save_idx = 0
            for training_set, test_set in kf.split(range(0,n_trials)):
                #split training and testing by trials
                X_train, X_test, y_train, y_test = process_train_test(X,y,training_set,test_set)
                lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)}) 
                lr.fit(X_train, y_train)
                y_test_predicted = lr.predict(X_test)
                n = y_test_predicted.shape[0]
                true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
                pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
                trial_save_idx += n

            sses =get_sses_pred(true_concat,pred_concat)
            sses_mean=get_sses_mean(true_concat)
            multi_R2s[j,k] =1-np.sum(sses)/np.sum(sses_mean)     
            print('R2:',multi_R2s[j,k])
            coef_arr.append(lr_all.best_estimator_.coef_)
            k += 1
        j += 1
        k = 0
        multi_coefs.append(coef_arr)

    fig, ax = plt.subplots()
    im = ax.imshow(multi_R2s)
    ax.set_xlabel('Length of lagging info')
    ax.set_ylabel('Length of leading info')

    ax.set_xticks(np.arange(len(t_after_range)))
    ax.set_yticks(np.arange(len(t_before_range)))
    ax.set_xticklabels(labels=t_after_range)
    ax.set_yticklabels(labels=t_before_range)

    ax.set_title('R2 predicting ' + str(pred_range) + ' ' + y_type +'\nwith different lagging/leading info')
    fig.tight_layout()

    for i in range(len(t_before_range)):
        for j in range(len(t_after_range)):
            text = ax.text(j, i, str(int(multi_R2s[i, j]*1000)/1000),
                           ha="center", va="center", color="w")

    plt.tight_layout()
    plt.savefig(figDir + monkey + '_multi_' + label + '.png', dpi = 'figure')
    plt.close()

Predicting with 0 to 0 ms neural data
1000.0
R2: 0.8203380562513611
Predicting with 0 to 50 ms neural data
1000.0
R2: 0.8858475964674932
Predicting with 0 to 100 ms neural data
1000.0
R2: 0.8996250362901388
Predicting with 0 to 150 ms neural data
1000.0
R2: 0.9088683259322539
Predicting with 0 to 200 ms neural data
1000.0
R2: 0.9087965951979173
Predicting with 0 to 250 ms neural data
1000.0
R2: 0.9129067451739092
Predicting with 0 to 300 ms neural data
1000.0
R2: 0.9103922051970267
Predicting with 0 to 350 ms neural data
1000.0
R2: 0.9078526741472736
Predicting with 0 to 400 ms neural data
1000.0
R2: 0.9060868831313249
Predicting with 0 to 450 ms neural data
1000.0
R2: 0.9038948361614509
Predicting with 0 to 500 ms neural data
1000.0
R2: 0.9040248525214972
Predicting with -50 to 0 ms neural data
1000.0
R2: 0.8227071406957723
Predicting with -50 to 50 ms neural data
1000.0
R2: 0.8916823327421969
Predicting with -50 to 100 ms neural data
1000.0
R2: 0.9056289161895521
Predicting with -50 

  plt.tight_layout()


Predicting with 0 to 0 ms neural data
100.0
R2: 0.7723273288822291
Predicting with 0 to 50 ms neural data
1000.0
R2: 0.8323046488817147
Predicting with 0 to 100 ms neural data
1000.0
R2: 0.8443339097238964
Predicting with 0 to 150 ms neural data
1000.0
R2: 0.8564905330080382
Predicting with 0 to 200 ms neural data
1000.0
R2: 0.8581311832869774
Predicting with 0 to 250 ms neural data
1000.0
R2: 0.8610481760805787
Predicting with 0 to 300 ms neural data
1000.0
R2: 0.8606102018942241
Predicting with 0 to 350 ms neural data
1000.0
R2: 0.8576305667218663
Predicting with 0 to 400 ms neural data
1000.0
R2: 0.8547165721480676
Predicting with 0 to 450 ms neural data
1000.0
R2: 0.8526656341241657
Predicting with 0 to 500 ms neural data
1000.0
R2: 0.8525135292489442
Predicting with -50 to 0 ms neural data
1000.0
R2: 0.7805135568467356
Predicting with -50 to 50 ms neural data
1000.0
R2: 0.8388530530127014
Predicting with -50 to 100 ms neural data
1000.0
R2: 0.8501969588130162
Predicting with -50 t

  plt.tight_layout()
