In [None]:
from nlb_tools.nwb_interface import NWBDataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import math
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966 (90 deg)
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0 (0 deg)
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793 (180 deg)
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

#Import standard packages
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy import io
from scipy import stats
import pickle

# If you would prefer to load the '.h5' example file rather than the '.pickle' example file. You need the deepdish package
# import deepdish as dd 

#Import function to get the covariate matrix that includes spike history from previous bins
from Neural_Decoding.preprocessing_funcs import get_spikes_with_history

#Import metrics
from Neural_Decoding.metrics import get_R2
from Neural_Decoding.metrics import get_rho

#Import decoder functions
from Neural_Decoding.decoders import WienerCascadeDecoder
from Neural_Decoding.decoders import WienerFilterDecoder
from Neural_Decoding.decoders import DenseNNDecoder
from Neural_Decoding.decoders import SimpleRNNDecoder
from Neural_Decoding.decoders import GRUDecoder
from Neural_Decoding.decoders import LSTMDecoder
from Neural_Decoding.decoders import XGBoostDecoder
from Neural_Decoding.decoders import SVRDecoder

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

def get_sses_pred(y_test,y_test_pred):
    sse=np.sum((y_test_pred-y_test)**2,axis=0)
    return sse

def get_sses_mean(y_test):
    y_mean=np.mean(y_test,axis=0)
    sse_mean=np.sum((y_test-y_mean)**2,axis=0)
    return sse_mean

def nans(shape, dtype=float):
    a = np.empty(shape, dtype)
    a.fill(np.nan)
    return a

def vector_reject(u,v):
    #project u on v, subtract u1 from u
    P = np.outer(v,(v.T))/(v@(v.T))
    u_sub = u - P@u
#     another calculation, to double-check
#     v_norm = np.sqrt(sum(v**2))    
#     proj_u_on_v = (np.dot(u, v)/v_norm**2)*v
#     u_sub = u - proj_u_on_v
    return u_sub

def calc_proj_matrix(A):
    return A@np.linalg.inv(A.T@A)@A.T
def calc_proj(b, A):
    P = calc_proj_matrix(A)
    return P@b.T

# Single Lag

In [None]:
foldername = "~/area2_population_analysis/s1-kinematics/actpas_NWB/"
monkey = "Han_20171207"
filename = foldername + monkey + "_COactpas_TD.nwb"

dataset_5ms = NWBDataset(filename, split_heldout=False)
dataset_5ms.resample(5)
dataset_5ms.smooth_spk(40, name='smth_40')
# dataset_5ms.smooth_spk(20, name='smth_20')
bin_width = dataset_5ms.bin_width
print(bin_width)

In [None]:
speed = np.sqrt(np.sum(dataset_5ms.data['hand_vel'][:].T**2,axis=0)).to_numpy().reshape((-1,1))
dataset_5ms.add_continuous_data(speed,'speed')

In [None]:
n_dims = 20 # for PCA

active_mask = (~dataset_5ms.trial_info.ctr_hold_bump) & (dataset_5ms.trial_info.split != 'none')
passive_mask = (dataset_5ms.trial_info.ctr_hold_bump) & (dataset_5ms.trial_info.split != 'none')


trial_mask = active_mask
n_trials = dataset_5ms.trial_info.loc[trial_mask].shape[0]
print(n_trials,'trials')
n_neurons = dataset_5ms.data.spikes.shape[1]
print(n_neurons,'neurons')

all_data = np.array(dataset_5ms.data.spikes_smth_40)
print(all_data.shape)
data_for_pca = all_data[~np.isnan(all_data).any(axis=1)]
print(data_for_pca.shape)

scaler = StandardScaler()
X = scaler.fit_transform(data_for_pca)
pca = PCA(n_components=n_dims)
X = pca.fit(X)

PCA_data = nans([all_data.shape[0],n_dims])
idx = 0
for dp in all_data:
    dp = dp.reshape((1, -1))
    if np.isnan(dp).any():
        dp_pca = nans([1,n_dims])
    else:
        dp_pca = pca.transform(scaler.transform(dp))
    PCA_data[idx,:] = dp_pca
    idx+=1
print(PCA_data.shape)
dataset_5ms.add_continuous_data(PCA_data,'PCA')
print('PCA total var explained:',sum(pca.explained_variance_ratio_))

#make dictionary for trial condition (reaching directions) for Stratified CV
active_trials_idx = np.array(dataset_5ms.trial_info.loc[trial_mask]['trial_id'])
cond_dir_idx = []
cond_dict = nans([n_trials])
for direction in [0,45,90,135,180,225,270,315]:
    cond_dir_idx.append(np.where((dataset_5ms.trial_info['cond_dir'] == direction) & (dataset_5ms.trial_info['ctr_hold_bump'] == False) & \
           (dataset_5ms.trial_info['split'] != 'none'))[0])
i = 0
for idx in active_trials_idx:
    for cond in range(0,len(cond_dir_idx)):
        if idx in cond_dir_idx[cond]:
            cond_dict[i] = cond
            break
    i+=1
print(cond_dict)

In [None]:
# # Prepare for PCA plotting

# active_data = dataset_5ms.make_trial_data(align_field='move_onset_time', align_range=(-100, 500), ignored_trials=~trial_mask)
# active_trials_pca = nans([n_trials,n_timepoints,n_dims])
# i = 0
# for idx, trial in active_data.groupby('trial_id'):
#     active_trials_pca[i,:,:]=trial.PCA.to_numpy()
#     i+=1
# print(active_trials_pca.shape)

# plot_dir = np.array([0,45,90,135,180,225,270,315]) # limit plot directions to reduce cluttering
# directions = np.array([0,45,90,135,180,225,270,315])
# pred_range = (-100, 500)
# x_axis = np.arange(pred_range[0], pred_range[1], dataset_5ms.bin_width)

# # define some useful time points
# move_idx=0
# ret_idx = 200

# import matplotlib as mpl
# cmap = plt.get_cmap('coolwarm',len(plot_dir))
# custom_palette = [mpl.colors.rgb2hex(cmap(i)) for i in range(len(plot_dir))]

# plot_dims = 10

# fig,ax=plt.subplots(plot_dims,1,figsize=(10,20))
# for i in range(plot_dims):
#     for j in range(len(plot_dir)):
#         color = custom_palette[j]
#         dir_idx = np.argwhere(directions == plot_dir[j])[0]
#         cond_mean_proj = np.mean(active_trials_pca[np.argwhere(cond_dict==dir_idx).flatten(),:,:], axis = 0)[:,i] 
#         pca_mean = np.mean(active_data.PCA.to_numpy(),axis = 0)[i]
#         ax[i].plot(x_axis,cond_mean_proj - pca_mean,linewidth=2.25,color = color,label = plot_dir[j])
        
#         ax[i].axvline(move_idx, color='k',linewidth = .5)
#         ax[i].axvline(ret_idx, color='k',linewidth = .5)
# #         ax[i].set_xlim([0,T])
#         ax[i].set_ylim([-6, 6])
#         ax[i].axhline(0,color ='k',ls = '--')
#         if i<plot_dims-1:
#             ax[i].set_xticks([])
#         else:
#             ax[i].set_xlabel('Time (ms)')
            
#         ax[i].set_yticks([])
#         ax[i].set_ylabel('Dim. '+str(i+1))

#     ax[0].set_title('PCA Projections')
    
# plt.legend(bbox_to_anchor = (1, 1), loc = 'upper left')

In [None]:
def process_train_test(X,y,training_set,test_set):
    X_train = X[training_set,:,:]
    X_test = X[test_set,:,:]
    y_train = y[training_set,:,:]
    y_test = y[test_set,:,:]

    #flat by trials
    X_flat_train = X_train.reshape((X_train.shape[0]*X_train.shape[1]),X_train.shape[2])
    X_flat_test = X_test.reshape((X_test.shape[0]*X_test.shape[1]),X_test.shape[2])
    y_train=y_train.reshape((y_train.shape[0]*y_train.shape[1]),y_train.shape[2])
    y_test=y_test.reshape((y_test.shape[0]*y_test.shape[1]),y_test.shape[2])
    
    X_flat_train_mean=np.nanmean(X_flat_train,axis=0)
    X_flat_train_std=np.nanstd(X_flat_train,axis=0)   
    #array with only 0 will have 0 std and cause errors
    X_flat_train_std[X_flat_train_std==0] = 1
    
    X_flat_train=(X_flat_train-X_flat_train_mean)/X_flat_train_std
    X_flat_test=(X_flat_test-X_flat_train_mean)/X_flat_train_std
    y_train_mean=np.mean(y_train,axis=0)
    y_train=y_train-y_train_mean
    y_test=y_test-y_train_mean    
    
    return X_flat_train,X_flat_test,y_train,y_test

In [None]:
def fit_and_predict(dataset, trial_mask, align_field, align_range, lag, x_field, y_field):
    """Extracts spiking and kinematic data from selected trials and fits linear decoder"""
    # Extract rate data from selected trials
    vel_df = dataset.make_trial_data(align_field=align_field, align_range=align_range, ignored_trials=~trial_mask)
    # Lag alignment for kinematics and extract kinematics data from selected trials
    lag_align_range = (align_range[0] + lag, align_range[1] + lag)
    rates_df = dataset.make_trial_data(align_field=align_field, align_range=lag_align_range, ignored_trials=~trial_mask)
    
    n_trials = rates_df['trial_id'].nunique()
    n_timepoints = int((align_range[1] - align_range[0])/dataset.bin_width)
    n_neurons = rates_df[x_field].shape[1]
    
    lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)})
    rates_array = rates_df[x_field].to_numpy()
    vel_array = vel_df[y_field].to_numpy()
    lr_all.fit(rates_array, vel_array)
    pred_vel = lr_all.predict(rates_array)
    vel_df = pd.concat([vel_df, pd.DataFrame(pred_vel, columns=dataset._make_midx('pred_vel', ['x', 'y'], 2))], axis=1)
#     print(lr_all.best_params_['alpha'])
    
    rates_array = rates_array.reshape(n_trials, n_timepoints, n_neurons)
    vel_array = vel_array.reshape(n_trials, n_timepoints, 2)
    
    skf = StratifiedKFold(n_splits=5,shuffle=True,random_state = 42)   
    true_concat = nans([n_trials*n_timepoints,2])
    pred_concat = nans([n_trials*n_timepoints,2])
    trial_save_idx = 0
    for training_set, test_set in skf.split(range(0,n_trials),cond_dict):
        #split training and testing by trials
        X_train, X_test, y_train, y_test = process_train_test(rates_array,vel_array,training_set,test_set)
        lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)}) 
        lr.fit(X_train, y_train)
        y_test_predicted = lr.predict(X_test)
        
        n = y_test_predicted.shape[0]
        true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
        pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
        trial_save_idx += n
    
    sses =get_sses_pred(true_concat,pred_concat)
    sses_mean=get_sses_mean(true_concat)
    R2 =1-np.sum(sses)/np.sum(sses_mean)     
    print('R2:',R2) 
    return R2, lr_all.best_estimator_.coef_, vel_df

In [None]:
def sub_and_predict(dataset, trial_mask, align_field, align_range, lag, x_field, y_field, weights):
    """Extracts spiking and kinematic data from selected trials and fits linear decoder"""
    # Extract rate data from selected trials
    vel_df = dataset.make_trial_data(align_field=align_field, align_range=align_range, ignored_trials=~trial_mask)
    # Lag alignment for kinematics and extract kinematics data from selected trials
    lag_align_range = (align_range[0] + lag, align_range[1] + lag)
    rates_df = dataset.make_trial_data(align_field=align_field, align_range=lag_align_range, ignored_trials=~trial_mask)
    
    n_trials = rates_df['trial_id'].nunique()
    n_timepoints = int((align_range[1] - align_range[0])/dataset.bin_width)
    n_neurons = rates_df[x_field].shape[1]

    rates_array = rates_df[x_field].to_numpy() - calc_proj(rates_df[x_field].to_numpy(),weights.T).T
    vel_array = vel_df[y_field].to_numpy()
    
    lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)})
    lr_all.fit(rates_array, vel_array)
    pred_vel = lr_all.predict(rates_array)
    vel_df = pd.concat([vel_df, pd.DataFrame(pred_vel, columns=dataset._make_midx('pred_vel', ['x', 'y'], 2))], axis=1)
#     print(lr_all.best_params_['alpha'])
    
    rates_array = rates_array.reshape(n_trials, n_timepoints, n_neurons)
    vel_array = vel_array.reshape(n_trials, n_timepoints, 2)
    
    skf = StratifiedKFold(n_splits=5,shuffle=True,random_state = 42)   
    true_concat = nans([n_trials*n_timepoints,2])
    pred_concat = nans([n_trials*n_timepoints,2])
    trial_save_idx = 0
    for training_set, test_set in skf.split(range(0,n_trials),cond_dict):
        #split training and testing by trials
        X_train, X_test, y_train, y_test = process_train_test(rates_array,vel_array,training_set,test_set)
        lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 1, 6)}) 
        lr.fit(X_train, y_train)
        y_test_predicted = lr.predict(X_test)
        
        n = y_test_predicted.shape[0]
        true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
        pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
        trial_save_idx += n
    
    sses =get_sses_pred(true_concat,pred_concat)
    sses_mean=get_sses_mean(true_concat)
    R2 =1-np.sum(sses)/np.sum(sses_mean)     
    print('R2:',R2) 
    return R2, lr_all.best_estimator_.coef_, vel_df

### Timing plots

In [None]:
n_neurons = dataset_5ms.data.spikes.shape[1]
print(n_neurons,'neurons')

active_mask = (~dataset_5ms.trial_info.ctr_hold_bump) & (dataset_5ms.trial_info.split != 'none')
passive_mask = (dataset_5ms.trial_info.ctr_hold_bump) & (dataset_5ms.trial_info.split != 'none')

In [None]:
plot_range = (-40, 100)
x_axis = np.arange(plot_range[0], plot_range[1], dataset_5ms.bin_width)
passive_df = dataset_5ms.make_trial_data(align_field='bump_time', align_range=plot_range, ignored_trials=~passive_mask)

plt.figure(figsize=(10,6))
for _, trial in passive_df.groupby('trial_id'):
    plt.plot(x_axis, trial.force['y'], color='red', linewidth=0.5)
plt.xlabel('Time after bump time (ms)')
plt.ylabel('Force to manipulandum (N)')
plt.axvline(0, color = 'k',linestyle = '--')
plt.title('Force aligned to bump_time')

In [None]:
plot_range = (-40, 100)
x_axis = np.arange(plot_range[0], plot_range[1], dataset_5ms.bin_width)
active_df = dataset_5ms.make_trial_data(align_field='move_onset_time', align_range=plot_range, ignored_trials=~active_mask)
passive_df = dataset_5ms.make_trial_data(align_field='move_onset_time', align_range=plot_range, ignored_trials=~passive_mask)

plt.figure(figsize=(10,6))
for _, trial in active_df.groupby('trial_id'):
    plt.plot(x_axis,trial.speed, color='k', linewidth=0.5)
for _, trial in passive_df.groupby('trial_id'):
    plt.plot(x_axis, trial.speed, color='red', linewidth=0.5)
plt.xlabel('Time after movement onset (ms)')
plt.ylabel('Hand speed (cm/s)')
plt.axvline(0, color = 'k',linestyle = '--')
plt.title('Speed aligned to bump_time')
# plt.axvline(120, color = 'k',linestyle = '--')

In [None]:
plot_range = (-40, 100)
x_axis = np.arange(plot_range[0], plot_range[1], dataset_5ms.bin_width)
active_df = dataset_5ms.make_trial_data(align_field='move_onset_time', align_range=plot_range, ignored_trials=~active_mask)
passive_df = dataset_5ms.make_trial_data(align_field='move_onset_time', align_range=plot_range, ignored_trials=~passive_mask)

active_trials_spikes = []
for _, trial in active_df.groupby('trial_id'):
    active_trials_spikes.append(np.sum(trial.spikes,axis=1))
passive_trials_spikes = []
for _, trial in passive_df.groupby('trial_id'):
    passive_trials_spikes.append(np.sum(trial.spikes,axis=1))

plt.plot(x_axis,np.sum(active_trials_spikes,axis = 0)/dataset_5ms.bin_width*1000/len(active_trials_spikes)/n_neurons,"-o",color = 'k',label = 'Active')
plt.plot(x_axis,np.sum(passive_trials_spikes,axis = 0)/dataset_5ms.bin_width*1000/len(passive_trials_spikes)/n_neurons,"-o",color = 'red',label = 'Passive')
plt.legend()
plt.title('Peristimulus')
plt.ylabel('Trial-average, neuron-average, FR (per sec)')
plt.xlabel('Time after movement onset (ms)')
plt.axvline(0, color = 'k',linestyle = '--')    

## with Neurons

In [None]:
lag_axis = np.arange(-300,300,20)
x_field = 'spikes_smth_40'
y_field ='hand_vel'
trial_mask = active_mask

# Prepare for plotting
plot_dir = [0.0, 90.0, 180.0, 270.0] # limit plot directions to reduce cluttering
plot_dim = 'x' # plot x velocity
colors = ['red', 'blue', 'green', 'orange']

figDir = "/Users/sherryan/area2_population_analysis/figures/neurons/act/"

dim = n_neurons

In [None]:
pred_range = (-100, 500)
label = '_whole_'
x_axis = np.arange(pred_range[0], pred_range[1], dataset_5ms.bin_width)

curr_r2_array = nans([len(lag_axis)])
curr_coef_array = nans([len(lag_axis),2,dim])
for i in range(len(lag_axis)):
    lag = lag_axis[i]
    r2, coef, _ = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag, x_field, y_field)
    curr_r2_array[i] = r2
    curr_coef_array[i,:,:] = coef

idx_max = np.argmax(curr_r2_array)
time_max = lag_axis[idx_max]
_, _, vel_df = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field)
for trial_dir, color in zip(plot_dir, colors):
    cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
    for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
        plt.plot(x_axis, trial.hand_vel[plot_dim], color=color, linewidth=0.5)
plt.xlabel('Time (ms)')
plt.ylabel('x-vel')
plt.tight_layout()
plt.savefig(figDir + monkey + label + 'true.png', dpi = 'figure')
plt.close()

for trial_dir, color in zip(plot_dir, colors):
    cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
    for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
        plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
plt.xlabel('Time (ms)')
plt.ylabel('x-vel')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'_pred.png', dpi = 'figure')
plt.close()

plt.plot(lag_axis, curr_r2_array)
plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
plt.legend()
plt.title('R2 score predicting hand velocity [-100,500]')
plt.xlabel('Time lag (ms)')
plt.ylabel('R2')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'.png', dpi = 'figure')
plt.close()

# ang_dist_to_max = nans([len(lag_axis)])
# for i in range(0, len(curr_coef_array)):
#     ang_dist_to_max[i] = math.degrees(angle_between(curr_coef_array[i,0,:],curr_coef_array[idx_max,0,:]))
# plt.scatter(lag_axis, ang_dist_to_max)
# plt.title('Angular distance to X-vel decoding dim at t_max')
# plt.xlabel('Time lag (ms)')
# plt.ylabel('Angular distance (degrees)')
# plt.show()

weights = curr_coef_array[idx_max,:,:]
for iter in range(0,7):  
    #subtract predictions with primary decoding dimensions (at time with max R2)
    sub_coef_array = nans([len(lag_axis),2,dim])
    sub_r2_array = nans([len(lag_axis)])

    for i in range(len(lag_axis)):
        lag = lag_axis[i]
        r2, coef,_ = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag,x_field,y_field,weights)
        sub_r2_array[i] = r2
        sub_coef_array[i,:,:] = coef

    plt.plot(lag_axis,sub_r2_array)
    plt.title('R2 score projecting out #'+ str(iter+1) +' t_max dim')
    idx_max = np.argmax(sub_r2_array)
    time_max = lag_axis[idx_max]
    plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
    plt.legend()
    plt.xlabel('Time lag (ms)')
    plt.ylabel('R2')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'.png', dpi = 'figure')
    plt.close()

    _, _, vel_df = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field,weights)
    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel('x-vel')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'_pred.png', dpi = 'figure')
    plt.close()
#     plt.plot(lag_axis,np.subtract(sub_r2_array,curr_r2_array))
#     plt.axhline(0,color = 'k',linestyle='--')
#     plt.title('R2 difference after projecting out t_max dim')
#     plt.xlabel('Time lag (ms)')
#     plt.ylabel('R2 difference')
#     plt.show()
#     curr_r2_array = sub_r2_array

    #stack the decoding dimensions to be projected out
    weights = np.vstack((weights,sub_coef_array[idx_max,:,:]))

In [None]:
pred_range = (-100, 120)
label = '_long_'
x_axis = np.arange(pred_range[0], pred_range[1], dataset_5ms.bin_width)

curr_r2_array = nans([len(lag_axis)])
curr_coef_array = nans([len(lag_axis),2,dim])
for i in range(len(lag_axis)):
    lag = lag_axis[i]
    r2, coef, _ = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag, x_field, y_field)
    curr_r2_array[i] = r2
    curr_coef_array[i,:,:] = coef

idx_max = np.argmax(curr_r2_array)
time_max = lag_axis[idx_max]
_, _, vel_df = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field)
for trial_dir, color in zip(plot_dir, colors):
    cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
    for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
        plt.plot(x_axis, trial.hand_vel[plot_dim], color=color, linewidth=0.5)
plt.xlabel('Time (ms)')
plt.ylabel('x-vel')
plt.tight_layout()
plt.savefig(figDir + monkey + label + 'true.png', dpi = 'figure')
plt.close()

for trial_dir, color in zip(plot_dir, colors):
    cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
    for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
        plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
plt.xlabel('Time (ms)')
plt.ylabel('x-vel')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'_pred.png', dpi = 'figure')
plt.close()

plt.plot(lag_axis, curr_r2_array)
plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
plt.legend()
plt.title('R2 score predicting hand velocity [0,120]')
plt.xlabel('Time lag (ms)')
plt.ylabel('R2')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'.png', dpi = 'figure')
plt.close()

weights = curr_coef_array[idx_max,:,:]
for iter in range(0,3):  
    #subtract predictions with primary decoding dimensions (at time with max R2)
    sub_coef_array = nans([len(lag_axis),2,dim])
    sub_r2_array = nans([len(lag_axis)])

    for i in range(len(lag_axis)):
        lag = lag_axis[i]
        r2, coef,_ = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag,x_field,y_field,weights)
        sub_r2_array[i] = r2
        sub_coef_array[i,:,:] = coef

    plt.plot(lag_axis,sub_r2_array)
    plt.title('R2 score projecting out #'+ str(iter+1) +' t_max dim')
    idx_max = np.argmax(sub_r2_array)
    time_max = lag_axis[idx_max]
    plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
    plt.legend()
    plt.xlabel('Time lag (ms)')
    plt.ylabel('R2')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'.png', dpi = 'figure')
    plt.close()


    _, _, vel_df = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field,weights)
    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel('x-vel')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'_pred.png', dpi = 'figure')
    plt.close()

    #stack the decoding dimensions to be projected out
    weights = np.vstack((weights,sub_coef_array[idx_max,:,:]))

In [None]:
pred_range = (380, 500)
label = '_late_'
x_axis = np.arange(pred_range[0], pred_range[1], dataset_5ms.bin_width)

curr_r2_array = nans([len(lag_axis)])
curr_coef_array = nans([len(lag_axis),2,dim])
for i in range(len(lag_axis)):
    lag = lag_axis[i]
    r2, coef, _ = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag, x_field, y_field)
    curr_r2_array[i] = r2
    curr_coef_array[i,:,:] = coef

idx_max = np.argmax(curr_r2_array)
time_max = lag_axis[idx_max]
_, _, vel_df = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field)
for trial_dir, color in zip(plot_dir, colors):
    cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
    for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
        plt.plot(x_axis, trial.hand_vel[plot_dim], color=color, linewidth=0.5)
plt.xlabel('Time (ms)')
plt.ylabel('x-vel')
plt.tight_layout()
plt.savefig(figDir + monkey + label + 'true.png', dpi = 'figure')
plt.close()

for trial_dir, color in zip(plot_dir, colors):
    cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
    for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
        plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
plt.xlabel('Time (ms)')
plt.ylabel('x-vel')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'_pred.png', dpi = 'figure')
plt.close()

plt.plot(lag_axis, curr_r2_array)
plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
plt.legend()
plt.title('R2 score predicting hand velocity [380,500]')
plt.xlabel('Time lag (ms)')
plt.ylabel('R2')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'.png', dpi = 'figure')
plt.close()

weights = curr_coef_array[idx_max,:,:]
for iter in range(0,3):  
    #subtract predictions with primary decoding dimensions (at time with max R2)
    sub_coef_array = nans([len(lag_axis),2,dim])
    sub_r2_array = nans([len(lag_axis)])

    for i in range(len(lag_axis)):
        lag = lag_axis[i]
        r2, coef,_ = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag,x_field,y_field,weights)
        sub_r2_array[i] = r2
        sub_coef_array[i,:,:] = coef

    plt.plot(lag_axis,sub_r2_array)
    plt.title('R2 score projecting out #'+ str(iter+1) +' t_max dim')
    idx_max = np.argmax(sub_r2_array)
    time_max = lag_axis[idx_max]
    plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
    plt.legend()
    plt.xlabel('Time lag (ms)')
    plt.ylabel('R2')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'.png', dpi = 'figure')
    plt.close()


    _, _, vel_df = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field,weights)
    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel('x-vel')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'_pred.png', dpi = 'figure')
    plt.close()

    #stack the decoding dimensions to be projected out
    weights = np.vstack((weights,sub_coef_array[idx_max,:,:]))

## with PCA

In [None]:
x_field = 'PCA'
y_field ='hand_vel'
lag_axis = np.arange(-300,300,20)

# Prepare for plotting
plot_dir = [0.0, 90.0, 180.0, 270.0] # limit plot directions to reduce cluttering
plot_dim = 'x' # plot x velocity
colors = ['red', 'blue', 'green', 'orange']

figDir = "/Users/sherryan/area2_population_analysis/figures/PCA/act/"

dim = n_dims

In [None]:
pred_range = (-100, 500)
label = '_whole_'
x_axis = np.arange(pred_range[0], pred_range[1], dataset_5ms.bin_width)

curr_r2_array = nans([len(lag_axis)])
curr_coef_array = nans([len(lag_axis),2,dim])
for i in range(len(lag_axis)):
    lag = lag_axis[i]
    r2, coef, _ = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag, x_field, y_field)
    curr_r2_array[i] = r2
    curr_coef_array[i,:,:] = coef

idx_max = np.argmax(curr_r2_array)
time_max = lag_axis[idx_max]
_, _, vel_df = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field)
for trial_dir, color in zip(plot_dir, colors):
    cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
    for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
        plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
plt.xlabel('Time (ms)')
plt.ylabel('x-vel')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'_pred.png', dpi = 'figure')
plt.close()

plt.plot(lag_axis, curr_r2_array)
plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
plt.legend()
plt.title('R2 score predicting hand velocity [-100,500]')
plt.xlabel('Time lag (ms)')
plt.ylabel('R2')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'.png', dpi = 'figure')
plt.close()

weights = curr_coef_array[idx_max,:,:]
for iter in range(0,3):  
    #subtract predictions with primary decoding dimensions (at time with max R2)
    sub_coef_array = nans([len(lag_axis),2,dim])
    sub_r2_array = nans([len(lag_axis)])

    for i in range(len(lag_axis)):
        lag = lag_axis[i]
        r2, coef,_ = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag,x_field,y_field,weights)
        sub_r2_array[i] = r2
        sub_coef_array[i,:,:] = coef

    plt.plot(lag_axis,sub_r2_array)
    plt.title('R2 score projecting out #'+ str(iter+1) +' t_max dim')
    idx_max = np.argmax(sub_r2_array)
    time_max = lag_axis[idx_max]
    plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
    plt.legend()
    plt.xlabel('Time lag (ms)')
    plt.ylabel('R2')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'.png', dpi = 'figure')
    plt.close()


    _, _, vel_df = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field,weights)
    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel('x-vel')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'_pred.png', dpi = 'figure')
    plt.close()

    #stack the decoding dimensions to be projected out
    weights = np.vstack((weights,sub_coef_array[idx_max,:,:]))

In [None]:
pred_range = (-100, 120)
label = '_long_'
x_axis = np.arange(pred_range[0], pred_range[1], dataset_5ms.bin_width)

curr_r2_array = nans([len(lag_axis)])
curr_coef_array = nans([len(lag_axis),2,dim])
for i in range(len(lag_axis)):
    lag = lag_axis[i]
    r2, coef, _ = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag, x_field, y_field)
    curr_r2_array[i] = r2
    curr_coef_array[i,:,:] = coef

idx_max = np.argmax(curr_r2_array)
time_max = lag_axis[idx_max]
_, _, vel_df = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field)
for trial_dir, color in zip(plot_dir, colors):
    cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
    for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
        plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
plt.xlabel('Time (ms)')
plt.ylabel('x-vel')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'_pred.png', dpi = 'figure')
plt.close()

plt.plot(lag_axis, curr_r2_array)
plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
plt.legend()
plt.title('R2 score predicting hand velocity [0,120]')
plt.xlabel('Time lag (ms)')
plt.ylabel('R2')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'.png', dpi = 'figure')
plt.close()

weights = curr_coef_array[idx_max,:,:]
for iter in range(0,3):  
    #subtract predictions with primary decoding dimensions (at time with max R2)
    sub_coef_array = nans([len(lag_axis),2,dim])
    sub_r2_array = nans([len(lag_axis)])

    for i in range(len(lag_axis)):
        lag = lag_axis[i]
        r2, coef,_ = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag,x_field,y_field,weights)
        sub_r2_array[i] = r2
        sub_coef_array[i,:,:] = coef

    plt.plot(lag_axis,sub_r2_array)
    plt.title('R2 score projecting out #'+ str(iter+1) +' t_max dim')
    idx_max = np.argmax(sub_r2_array)
    time_max = lag_axis[idx_max]
    plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
    plt.legend()
    plt.xlabel('Time lag (ms)')
    plt.ylabel('R2')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'.png', dpi = 'figure')
    plt.close()


    _, _, vel_df = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field,weights)
    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel('x-vel')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'_pred.png', dpi = 'figure')
    plt.close()

    #stack the decoding dimensions to be projected out
    weights = np.vstack((weights,sub_coef_array[idx_max,:,:]))

In [None]:
pred_range = (380, 500)
label = '_late_'
x_axis = np.arange(pred_range[0], pred_range[1], dataset_5ms.bin_width)

curr_r2_array = nans([len(lag_axis)])
curr_coef_array = nans([len(lag_axis),2,dim])
for i in range(len(lag_axis)):
    lag = lag_axis[i]
    r2, coef, _ = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag, x_field, y_field)
    curr_r2_array[i] = r2
    curr_coef_array[i,:,:] = coef

idx_max = np.argmax(curr_r2_array)
time_max = lag_axis[idx_max]
_, _, vel_df = fit_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field)
for trial_dir, color in zip(plot_dir, colors):
    cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
    for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
        plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
plt.xlabel('Time (ms)')
plt.ylabel('x-vel')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'_pred.png', dpi = 'figure')
plt.close()

plt.plot(lag_axis, curr_r2_array)
plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
plt.legend()
plt.title('R2 score predicting hand velocity [380,500]')
plt.xlabel('Time lag (ms)')
plt.ylabel('R2')
plt.tight_layout()
plt.savefig(figDir + monkey + label + str(0) +'.png', dpi = 'figure')
plt.close()

weights = curr_coef_array[idx_max,:,:]
for iter in range(0,3):  
    #subtract predictions with primary decoding dimensions (at time with max R2)
    sub_coef_array = nans([len(lag_axis),2,dim])
    sub_r2_array = nans([len(lag_axis)])

    for i in range(len(lag_axis)):
        lag = lag_axis[i]
        r2, coef,_ = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, lag,x_field,y_field,weights)
        sub_r2_array[i] = r2
        sub_coef_array[i,:,:] = coef

    plt.plot(lag_axis,sub_r2_array)
    plt.title('R2 score projecting out #'+ str(iter+1) +' t_max dim')
    idx_max = np.argmax(sub_r2_array)
    time_max = lag_axis[idx_max]
    plt.axvline(time_max, color = 'r', label='t_max = ' + str(time_max))
    plt.legend()
    plt.xlabel('Time lag (ms)')
    plt.ylabel('R2')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'.png', dpi = 'figure')
    plt.close()


    _, _, vel_df = sub_and_predict(dataset_5ms, trial_mask, 'move_onset_time', pred_range, time_max, x_field, y_field,weights)
    for trial_dir, color in zip(plot_dir, colors):
        cond_ids = dataset_5ms.trial_info[dataset_5ms.trial_info.cond_dir == trial_dir].trial_id
        for _, trial in vel_df[np.isin(vel_df.trial_id, cond_ids)].groupby('trial_id'):
            plt.plot(x_axis, trial.pred_vel[plot_dim], color=color, linewidth=0.5)
    plt.xlabel('Time (ms)')
    plt.ylabel('x-vel')
    plt.tight_layout()
    plt.savefig(figDir + monkey + label + str(iter+1) +'_pred.png', dpi = 'figure')
    plt.close()
    
    #stack the decoding dimensions to be projected out
    weights = np.vstack((weights,sub_coef_array[idx_max,:,:]))

# Multi Lags

In [None]:
dataset_50ms = NWBDataset(filename, split_heldout=False)
dataset_50ms.resample(50)
print(dataset_50ms.bin_width)

In [None]:
n_dims = 20 # for PCA

active_mask = (~dataset_50ms.trial_info.ctr_hold_bump) & (dataset_50ms.trial_info.split != 'none')
passive_mask = (dataset_50ms.trial_info.ctr_hold_bump) & (dataset_50ms.trial_info.split != 'none')


trial_mask = active_mask
n_trials = dataset_50ms.trial_info.loc[trial_mask].shape[0]
print(n_trials,'trials')
n_neurons = dataset_50ms.data.spikes.shape[1]
print(n_neurons,'neurons')

all_data = np.array(dataset_50ms.data.spikes)
print(all_data.shape)
data_for_pca = all_data[~np.isnan(all_data).any(axis=1)]
print(data_for_pca.shape)

scaler = StandardScaler()
X = scaler.fit_transform(data_for_pca)
pca = PCA(n_components=n_dims)
X = pca.fit(X)

PCA_data = nans([all_data.shape[0],n_dims])
idx = 0
for dp in all_data:
    dp = dp.reshape((1, -1))
    if np.isnan(dp).any():
        dp_pca = nans([1,n_dims])
    else:
        dp_pca = pca.transform(scaler.transform(dp))
    PCA_data[idx,:] = dp_pca
    idx+=1
print(PCA_data.shape)
dataset_50ms.add_continuous_data(PCA_data,'PCA')
print('PCA total var explained:',sum(pca.explained_variance_ratio_))

In [None]:
active_data = dataset_50ms.make_trial_data(align_field='move_onset_time', align_range=(-300, 700), ignored_trials=~trial_mask)
for idx, trial in active_data.groupby('trial_id'):
    n_timepoints = trial.shape[0]
    break
print(n_timepoints,'time bins')

active_trials_neuron = nans([n_trials,n_timepoints,n_neurons])
active_trials_vel = nans([n_trials,n_timepoints,2])
active_trials_pca = nans([n_trials,n_timepoints,n_dims])
i = 0
for idx, trial in active_data.groupby('trial_id'):
    active_trials_neuron[i,:,:]=trial.spikes.to_numpy()
    active_trials_vel[i,:,:]=trial.hand_vel.to_numpy()
    active_trials_pca[i,:,:]=trial.PCA.to_numpy()
    i+=1
print(active_trials_neuron.shape)
print(active_trials_vel.shape)
print(active_trials_pca.shape)

#make dictionary for trial condition (reaching directions) for Stratified CV
active_trials_idx = np.array(dataset_50ms.trial_info.loc[trial_mask]['trial_id'])
cond_dir_idx = []
cond_dict = nans([n_trials])
for direction in [0,45,90,135,180,225,270,315]:
    cond_dir_idx.append(np.where((dataset_50ms.trial_info['cond_dir'] == direction) & (dataset_50ms.trial_info['ctr_hold_bump'] == False) & \
           (dataset_50ms.trial_info['split'] != 'none'))[0])
i = 0
for idx in active_trials_idx:
    for cond in range(0,len(cond_dir_idx)):
        if idx in cond_dir_idx[cond]:
            cond_dict[i] = cond
            break
    i+=1
print(cond_dict)

## Early

### with Neurons

In [None]:
data_range = [-300,700]
pred_start = -100
pred_end = 120

idx1 = int((pred_start - data_range[0])/dataset_50ms.bin_width)
idx2 = int(n_timepoints - (data_range[1]-pred_end)/dataset_50ms.bin_width)

t_before_range = range(0,301,50);
t_after_range = range(0,501,50);

early_multi_R2s = nans([len(t_before_range),len(t_after_range)])
early_multi_coefs = []
j,k=0,0
for time_before in t_before_range:
    coef_arr = []
    for time_after in t_after_range:
        print('Predicting with',-time_before, 'to', time_after,'ms neural data')
        
        bins_before= int(time_before/dataset_50ms.bin_width) #How many bins of neural data prior to the output are used for decoding
        bins_current= 1 #Whether to use concurrent time bin of neural data
        bins_after= int(time_after/dataset_50ms.bin_width) #How many bins of neural data after the output are used for decoding

        n_total_bins = bins_before + bins_current + bins_after

        X =  nans([n_trials,idx2-idx1,n_total_bins*n_neurons])
        i = 0
        for trial_data in active_trials_neuron:
            trial_hist=get_spikes_with_history(trial_data,bins_before,bins_after,bins_current)
            trial_hist = trial_hist[idx1:idx2,:,:]
            trial_hist_flat=trial_hist.reshape(trial_hist.shape[0],(trial_hist.shape[1]*trial_hist.shape[2]))
            X[i,:,:] = trial_hist_flat
            i+=1
        
        y = active_trials_vel[:,idx1:idx2,:]
    
        lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)})
        X_reshaped = X.reshape((X.shape[0]*X.shape[1]),X.shape[2])
        y_reshaped = y.reshape((y.shape[0]*y.shape[1]),y.shape[2])
        lr_all.fit(X_reshaped, y_reshaped)
        print(lr_all.best_params_['alpha'])
        
        skf = StratifiedKFold(n_splits=5,shuffle=True,random_state = 42)   
        true_concat = nans([(n_trials*(idx2-idx1)),2])
        pred_concat = nans([(n_trials*(idx2-idx1)),2])
        trial_save_idx = 0
        for training_set, test_set in skf.split(range(0,n_trials),cond_dict):
            #split training and testing by trials
            X_train, X_test, y_train, y_test = process_train_test(X,y,training_set,test_set)
            lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)}) 
            lr.fit(X_train, y_train)
            y_test_predicted = lr.predict(X_test)
            n = y_test_predicted.shape[0]
            true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
            pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
            trial_save_idx += n

        sses =get_sses_pred(true_concat,pred_concat)
        sses_mean=get_sses_mean(true_concat)
        early_multi_R2s[j,k] =1-np.sum(sses)/np.sum(sses_mean)     
        print('R2:',early_multi_R2s[j,k])
        coef_arr.append(lr_all.best_estimator_.coef_)
        k += 1
    j += 1
    k = 0
    early_multi_coefs.append(coef_arr)

In [None]:
fig, ax = plt.subplots()
im = ax.imshow(early_multi_R2s)
ax.set_xlabel('Length of lagging info')
ax.set_ylabel('Length of leading info')

ax.set_xticks(np.arange(len(t_after_range)))
ax.set_yticks(np.arange(len(t_before_range)))
ax.set_xticklabels(labels=t_after_range)
ax.set_yticklabels(labels=t_before_range)

ax.set_title("R2 predicting [-100, 120] velocity \nwith different lagging/leading info")
fig.tight_layout()
 
for i in range(len(t_before_range)):
    for j in range(len(t_after_range)):
        text = ax.text(j, i, str(int(early_multi_R2s[i, j]*1000)/1000),
                       ha="center", va="center", color="w")

plt.tight_layout()
figDir = "/Users/sherryan/area2_population_analysis/figures/neurons/act/"
plt.savefig(figDir + monkey + '_multi_early.png', dpi = 'figure')
plt.close()

### with PCA

In [None]:
PCA_early_multi_R2s = nans([len(t_before_range),len(t_after_range)])
PCA_early_multi_coefs = []
j,k=0,0
for time_before in t_before_range:
    coef_arr = []
    for time_after in t_after_range:
        print('Predicting with',-time_before, 'to', time_after,'ms neural data')
        
        bins_before= int(time_before/dataset_50ms.bin_width) #How many bins of neural data prior to the output are used for decoding
        bins_current= 1 #Whether to use concurrent time bin of neural data
        bins_after= int(time_after/dataset_50ms.bin_width) #How many bins of neural data after the output are used for decoding

        n_total_bins = bins_before + bins_current + bins_after

        X =  nans([n_trials,idx2-idx1,n_total_bins*n_dims])
        i = 0
        for trial_data in active_trials_pca:
            trial_hist=get_spikes_with_history(trial_data,bins_before,bins_after,bins_current)
            trial_hist = trial_hist[idx1:idx2,:,:]
            trial_hist_flat=trial_hist.reshape(trial_hist.shape[0],(trial_hist.shape[1]*trial_hist.shape[2]))
            X[i,:,:] = trial_hist_flat
            i+=1
        
        y = active_trials_vel[:,idx1:idx2,:]
    
        lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)})
        X_reshaped = X.reshape((X.shape[0]*X.shape[1]),X.shape[2])
        y_reshaped = y.reshape((y.shape[0]*y.shape[1]),y.shape[2])
        lr_all.fit(X_reshaped, y_reshaped)
        print(lr_all.best_params_['alpha'])
        
        skf =StratifiedKFold(n_splits=5,shuffle=True,random_state = 42)   
        true_concat = nans([(n_trials*(idx2-idx1)),2])
        pred_concat = nans([(n_trials*(idx2-idx1)),2])
        trial_save_idx = 0
        for training_set, test_set in skf.split(range(0,n_trials),cond_dict):
            #split training and testing by trials
            X_train, X_test, y_train, y_test = process_train_test(X,y,training_set,test_set)
            lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)}) 
            lr.fit(X_train, y_train)
            y_test_predicted = lr.predict(X_test)
            n = y_test_predicted.shape[0]
            true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
            pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
            trial_save_idx += n

        sses =get_sses_pred(true_concat,pred_concat)
        sses_mean=get_sses_mean(true_concat)
        PCA_early_multi_R2s[j,k] =1-np.sum(sses)/np.sum(sses_mean)     
        print('R2:',PCA_early_multi_R2s[j,k])
        coef_arr.append(lr_all.best_estimator_.coef_)
        k += 1
    j += 1
    k = 0
    PCA_early_multi_coefs.append(coef_arr)

In [None]:
fig, ax = plt.subplots()
im = ax.imshow(PCA_early_multi_R2s)
ax.set_xlabel('Length of lagging info')
ax.set_ylabel('Length of leading info')

ax.set_xticks(np.arange(len(t_after_range)))
ax.set_yticks(np.arange(len(t_before_range)))
ax.set_xticklabels(labels=t_after_range)
ax.set_yticklabels(labels=t_before_range)

ax.set_title("R2 predicting [0, 120] velocity \nwith different lagging/leading info")
fig.tight_layout()

for i in range(len(t_before_range)):
    for j in range(len(t_after_range)):
        text = ax.text(j, i, str(int(PCA_early_multi_R2s[i, j]*1000)/1000),
                       ha="center", va="center", color="w")
plt.tight_layout()
figDir = "/Users/sherryan/area2_population_analysis/figures/PCA/act/"
plt.savefig(figDir + monkey + '_multi_early.png', dpi = 'figure')
plt.close()

## Whole

### with Neurons

In [None]:
data_range = [-300,700]
pred_start = -100
pred_end = 500

idx1 = int((pred_start - data_range[0])/dataset_50ms.bin_width)
idx2 = int(n_timepoints - (data_range[1]-pred_end)/dataset_50ms.bin_width)

t_before_range = range(0,201,50);
t_after_range = range(0,201,50);

whole_multi_R2s = nans([len(t_before_range),len(t_after_range)])
whole_multi_coefs = []
j,k=0,0
for time_before in t_before_range:
    coef_arr = []
    for time_after in t_after_range:
        print('Predicting with',-time_before, 'to', time_after,'ms neural data')
        
        bins_before= int(time_before/dataset_50ms.bin_width) #How many bins of neural data prior to the output are used for decoding
        bins_current= 1 #Whether to use concurrent time bin of neural data
        bins_after= int(time_after/dataset_50ms.bin_width) #How many bins of neural data after the output are used for decoding

        n_total_bins = bins_before + bins_current + bins_after

        X =  nans([n_trials,idx2-idx1,n_total_bins*n_neurons])
        i = 0
        for trial_data in active_trials_neuron:
            trial_hist=get_spikes_with_history(trial_data,bins_before,bins_after,bins_current)
            trial_hist = trial_hist[idx1:idx2,:,:]
            trial_hist_flat=trial_hist.reshape(trial_hist.shape[0],(trial_hist.shape[1]*trial_hist.shape[2]))
            X[i,:,:] = trial_hist_flat
            i+=1
        
        y = active_trials_vel[:,idx1:idx2,:]
    
        lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)})
        X_reshaped = X.reshape((X.shape[0]*X.shape[1]),X.shape[2])
        y_reshaped = y.reshape((y.shape[0]*y.shape[1]),y.shape[2])
        lr_all.fit(X_reshaped, y_reshaped)
        print(lr_all.best_params_['alpha'])
        
        skf = StratifiedKFold(n_splits=5,shuffle=True,random_state = 42)   
        true_concat = nans([(n_trials*(idx2-idx1)),2])
        pred_concat = nans([(n_trials*(idx2-idx1)),2])
        trial_save_idx = 0
        for training_set, test_set in skf.split(range(0,n_trials),cond_dict):
            #split training and testing by trials
            X_train, X_test, y_train, y_test = process_train_test(X,y,training_set,test_set)
            lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)}) 
            lr.fit(X_train, y_train)
            y_test_predicted = lr.predict(X_test)
            n = y_test_predicted.shape[0]
            true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
            pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
            trial_save_idx += n

        sses =get_sses_pred(true_concat,pred_concat)
        sses_mean=get_sses_mean(true_concat)
        whole_multi_R2s[j,k] =1-np.sum(sses)/np.sum(sses_mean)     
        print('R2:',whole_multi_R2s[j,k])
        coef_arr.append(lr_all.best_estimator_.coef_)
        k += 1
    j += 1
    k = 0
    whole_multi_coefs.append(coef_arr)

In [None]:
fig, ax = plt.subplots()
im = ax.imshow(whole_multi_R2s)
ax.set_xlabel('Length of lagging info')
ax.set_ylabel('Length of leading info')

ax.set_xticks(np.arange(len(t_after_range)))
ax.set_yticks(np.arange(len(t_before_range)))
ax.set_xticklabels(labels=t_after_range)
ax.set_yticklabels(labels=t_before_range)

ax.set_title("R2 predicting [-100, 500] velocity \nwith different lagging/leading info")
fig.tight_layout()

for i in range(len(t_before_range)):
    for j in range(len(t_after_range)):
        text = ax.text(j, i, str(int(whole_multi_R2s[i, j]*1000)/1000),
                       ha="center", va="center", color="w")
plt.tight_layout()
figDir = "/Users/sherryan/area2_population_analysis/figures/neurons/act/"
plt.savefig(figDir + monkey + '_multi_whole.png', dpi = 'figure')
plt.close()

In [None]:
coef_X = whole_multi_coefs[-1][-1][0] #which entry's weights to use

t_label = np.arange(-200,201,50)

n_weights = len(t_before_range) + len(t_after_range) - 1
coef_X_reshaped = coef_X.reshape(n_weights,n_neurons)
angDist_array = nans([n_weights,n_weights])
for i in range(n_weights):
    for j in range(n_weights):
        angDist_array[i,j] = math.degrees(angle_between(coef_X_reshaped[i,:],coef_X_reshaped[j,:]))
fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(angDist_array)
ax.set_xlabel('Bin time (ms)')
ax.set_ylabel('Bin time (ms)')

ax.set_xticks(np.arange(len(t_label)))
ax.set_yticks(np.arange(len(t_label)))
ax.set_xticklabels(labels=t_label)
ax.set_yticklabels(labels=t_label)

ax.set_title("Angle between weight vectors at time points")
fig.tight_layout()

for i in range(len(t_label)):
    for j in range(len(t_label)):
        text = ax.text(j, i, str(int(angDist_array[i, j])),
                       ha="center", va="center", color="w")
plt.tight_layout()
figDir = "/Users/sherryan/area2_population_analysis/figures/neurons/act/"
plt.savefig(figDir + monkey + '_multi_whole_deg.png', dpi = 'figure')
plt.close()

### with PCA

In [None]:
PCA_whole_multi_R2s = nans([len(t_before_range),len(t_after_range)])
PCA_whole_multi_coefs = []
j,k=0,0
for time_before in t_before_range:
    coef_arr = []
    for time_after in t_after_range:
        print('Predicting with',-time_before, 'to', time_after,'ms neural data')
        
        bins_before= int(time_before/dataset_50ms.bin_width) #How many bins of neural data prior to the output are used for decoding
        bins_current= 1 #Whether to use concurrent time bin of neural data
        bins_after= int(time_after/dataset_50ms.bin_width) #How many bins of neural data after the output are used for decoding

        n_total_bins = bins_before + bins_current + bins_after

        X =  nans([n_trials,idx2-idx1,n_total_bins*n_dims])
        i = 0
        for trial_data in active_trials_pca:
            trial_hist=get_spikes_with_history(trial_data,bins_before,bins_after,bins_current)
            trial_hist = trial_hist[idx1:idx2,:,:]
            trial_hist_flat=trial_hist.reshape(trial_hist.shape[0],(trial_hist.shape[1]*trial_hist.shape[2]))
            X[i,:,:] = trial_hist_flat
            i+=1
        
        y = active_trials_vel[:,idx1:idx2,:]
    
        lr_all = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)})
        X_reshaped = X.reshape((X.shape[0]*X.shape[1]),X.shape[2])
        y_reshaped = y.reshape((y.shape[0]*y.shape[1]),y.shape[2])
        lr_all.fit(X_reshaped, y_reshaped)
        print(lr_all.best_params_['alpha'])
        
        skf = StratifiedKFold(n_splits=5,shuffle=True,random_state = 42)   
        true_concat = nans([(n_trials*(idx2-idx1)),2])
        pred_concat = nans([(n_trials*(idx2-idx1)),2])
        trial_save_idx = 0
        for training_set, test_set in skf.split(range(0,n_trials),cond_dict):
            #split training and testing by trials
            X_train, X_test, y_train, y_test = process_train_test(X,y,training_set,test_set)
            lr = GridSearchCV(Ridge(), {'alpha': np.logspace(-4, 4, 9)}) 
            lr.fit(X_train, y_train)
            y_test_predicted = lr.predict(X_test)
            n = y_test_predicted.shape[0]
            true_concat[trial_save_idx:trial_save_idx+n,:] = y_test
            pred_concat[trial_save_idx:trial_save_idx+n,:] = y_test_predicted
            trial_save_idx += n

        sses =get_sses_pred(true_concat,pred_concat)
        sses_mean=get_sses_mean(true_concat)
        PCA_whole_multi_R2s[j,k] =1-np.sum(sses)/np.sum(sses_mean)     
        print('R2:',PCA_whole_multi_R2s[j,k])
        coef_arr.append(lr_all.best_estimator_.coef_)
        k += 1
    j += 1
    k = 0
    PCA_whole_multi_coefs.append(coef_arr)

In [None]:
fig, ax = plt.subplots()
im = ax.imshow(PCA_whole_multi_R2s)
ax.set_xlabel('Length of lagging info')
ax.set_ylabel('Length of leading info')

ax.set_xticks(np.arange(len(t_after_range)))
ax.set_yticks(np.arange(len(t_before_range)))
ax.set_xticklabels(labels=t_after_range)
ax.set_yticklabels(labels=t_before_range)

ax.set_title("R2 predicting [-100, 500] velocity \nwith different lagging/leading info")
fig.tight_layout()

for i in range(len(t_before_range)):
    for j in range(len(t_after_range)):
        text = ax.text(j, i, str(int(PCA_whole_multi_R2s[i, j]*1000)/1000),
                       ha="center", va="center", color="w")
plt.tight_layout()
figDir = "/Users/sherryan/area2_population_analysis/figures/PCA/act/"
plt.savefig(figDir + monkey + '_multi_whole.png', dpi = 'figure')
plt.close()

In [None]:
coef_X = PCA_whole_multi_coefs[-1][-1][0] #which entry's weights to use

t_label = np.arange(-200,201,50)

n_weights = len(t_before_range) + len(t_after_range) - 1
coef_X_reshaped = coef_X.reshape(n_weights,n_dims)
angDist_array = nans([n_weights,n_weights])
for i in range(n_weights):
    for j in range(n_weights):
        angDist_array[i,j] = math.degrees(angle_between(coef_X_reshaped[i,:],coef_X_reshaped[j,:]))
fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(angDist_array)
ax.set_xlabel('Bin time (ms)')
ax.set_ylabel('Bin time (ms)')

ax.set_xticks(np.arange(len(t_label)))
ax.set_yticks(np.arange(len(t_label)))
ax.set_xticklabels(labels=t_label)
ax.set_yticklabels(labels=t_label)

ax.set_title("Angle between weight vectors at time points")
fig.tight_layout()

for i in range(len(t_label)):
    for j in range(len(t_label)):
        text = ax.text(j, i, str(int(angDist_array[i, j])),
                       ha="center", va="center", color="w")
plt.tight_layout()
figDir = "/Users/sherryan/area2_population_analysis/figures/PCA/act/"
plt.savefig(figDir + monkey + '_multi_whole_deg.png', dpi = 'figure')
plt.close()