In [1]:
###### Notice there are both Unsupervised+Supervised UMAP
### https://umap-learn.readthedocs.io/en/latest/supervised.html

import sys, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib as jl
import cebra.datasets
from cebra import CEBRA
import scipy.io as sio
from sklearn.linear_model import LinearRegression,LogisticRegression
import sklearn.metrics
import torch

def split_data(neural, continuous_index, train_trial):
            split_idx = train_trial*dur 
            neural_train = neural[:split_idx]
            neural_test = neural[split_idx:]
            continuous_index_train = continuous_index[:split_idx]
            continuous_index_test = continuous_index[split_idx:]
            return neural_train,neural_test,continuous_index_train,continuous_index_test
dur = 40
n_conds = 8
angle_to_new_value = {-180: 4,-135: 5,-90: 6,-45: 7,0: 0,45: 1,90: 2,135: 3,180: 4}
directory = "./data/SU_16M1/"
files = os.listdir(directory)

In [None]:
import umap
output_dimension = 2
for file in files:
    if "Chewie" in file: ###  or "Mihili" in file 
        mat_contents = sio.loadmat(os.path.join(directory, file))
        filename_parts = file.split("_neural_con_dis_index")
        new_filename = filename_parts[0][:7]+filename_parts[0][-6:] + "_embed_M1.npz"
        file_save = os.path.join(directory, new_filename)
        print(file_save)

        neural = mat_contents['neural_M1']
        continuous_index_2d = mat_contents['continuous_index']*10
        discrete_index = mat_contents['discrete_index']
        
        vectorized_map = np.vectorize(lambda x: angle_to_new_value[x])
        discrete_index = vectorized_map(discrete_index)
        continuous_index = np.hstack((continuous_index_2d, discrete_index*45))
        total_trial = int(discrete_index.shape[0]/dur)
        train_trial = int(total_trial*0.8)
        test_trial = total_trial-train_trial
        
        neural_train, neural_test, continuous_index_train, continuous_index_test = split_data(neural, continuous_index, train_trial)
        target_angle_train = continuous_index_train[:, 2].copy()
        target_angle_test = continuous_index_test[:, 2].copy()
        
        cebra_veldir_model = umap.UMAP(n_neighbors=68,min_dist=0.2475,n_components=2,n_jobs=8,
                               random_state=None,metric='euclidean')
        ######******* Supervised *******######
        cebra_veldir_train = cebra_veldir_model.fit_transform(neural_train, y=target_angle_train)
        cebra_veldir_test = cebra_veldir_model.fit_transform(neural_test, y=target_angle_test)
        ######******* Unsupervised *******######
        cebra_veldir_train = cebra_veldir_model.fit_transform(neural_train)
        cebra_veldir_test = cebra_veldir_model.fit_transform(neural_test)
        
        velocity_reshaped = continuous_index_train[:, 0:2].reshape(train_trial, dur, 2)
        locations = np.cumsum(velocity_reshaped, axis=1)
        truth_XY = locations.reshape(train_trial*dur, 2)
        
        X = cebra_veldir_train
        y = continuous_index_train[:, 0:2]
        reg = LinearRegression().fit(X, y)
        pred_vel = reg.predict(X) 
        y_C = continuous_index_train[:, 2]
        LogisticReg = LogisticRegression(max_iter=500, multi_class='multinomial', solver='lbfgs')
        LogisticReg.fit(X, y_C)
        pred_dir = LogisticReg.predict(X)

        velocity_reshaped = pred_vel.reshape(train_trial, dur, 2)
        locations = np.cumsum(velocity_reshaped, axis=1)
        pred_XY = locations.reshape(train_trial*dur, 2)
        
        posi_r2 = sklearn.metrics.r2_score(truth_XY, pred_XY) ### proportion of total variation explained by model
        vel_r2 = sklearn.metrics.r2_score(continuous_index_train[:, 0:2], pred_vel)
        
        differences = abs(pred_dir - target_angle_train)
        angle_diffs = np.where(differences > 180, 360 - differences, differences)
        fig = plt.figure(figsize=(10, 5))
        ax1 = plt.subplot(121)
        ax1.scatter(truth_XY[:, 0], truth_XY[:, 1], alpha=1, color=plt.cm.hsv(1/360*target_angle_train), s=0.3)
        ax1.spines["right"].set_visible(False)
        ax1.spines["top"].set_visible(False)
        plt.title('Var-R2 vel='+str(round(reg.score(X, y), 3))+' dir='+str(round(LogisticReg.score(X, y_C), 3))\
                 +' MAE='+str(round(np.mean(angle_diffs),1)))
        
        ax2 = plt.subplot(122)
        ax2.scatter(pred_XY[:, 0], pred_XY[:, 1], alpha=1, color=plt.cm.hsv(1/360*pred_dir), s=0.3)
        ax2.spines["right"].set_visible(False)
        ax2.spines["top"].set_visible(False)
        plt.title('True vs Pred-R2 vel='+str(round(vel_r2, 3))+' pos='+str(round(posi_r2, 3)))
        new_filename = filename_parts[0][:7]+filename_parts[0][-6:] + "_Decoding.pdf"
        output_path = os.path.join(directory, new_filename)
        plt.savefig(output_path)
        plt.close(fig)
        
        idx1, idx2= 0, 1
        fig = plt.figure(figsize=(10, 5), dpi=250)
        ax = plt.subplot(121)
        norm = plt.Normalize(vmin=0, vmax=1) ### Ensures full range from 0 to 1 is used ****NECESSARY
        x = ax.scatter(cebra_veldir_train[:, idx1],cebra_veldir_train[:, idx2],c=target_angle_train/360,
                       cmap=plt.cm.hsv, edgecolors='none', norm=norm, alpha=0.75,s=5)
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        xlims = ax.get_xlim()
        ylims = ax.get_ylim()
        ax = plt.subplot(122)
        for i in range(8):
            direction_trial = (target_angle_train//45 == i)
            trial_avg = cebra_veldir_train[direction_trial, :].reshape(-1,dur,output_dimension).mean(axis=0)
            ax.scatter(trial_avg[:, idx1],trial_avg[:, idx2],
                       color=plt.cm.hsv(1 / 8 * i), edgecolors='none', alpha=0.75, s=10)
            ax.plot(trial_avg[:, idx1],trial_avg[:, idx2],
                color=plt.cm.hsv(1 / 8 * i),linewidth=0.5, alpha=0.75)  
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xlim(xlims)
        ax.set_ylim(ylims)
        new_filename = filename_parts[0][:7]+filename_parts[0][-6:] + "_Embedding.pdf"
        output_path = os.path.join(directory, new_filename)
        plt.savefig(output_path)
        plt.close(fig)

        np.savez(file_save,
                cebra_veldir_train=cebra_veldir_train,
                 cebra_veldir_test=cebra_veldir_test,
                 continuous_index_train=continuous_index_train,
                 continuous_index_test=continuous_index_test)

In [9]:
from sklearn.decomposition import PCA
### it requires 8 different target angles for alignment(averaging over trials)
### from sklearn.preprocessing import StandardScaler >>> unnecessary
output_dimension = 3
for file in files:
    if "Chewie" in file: ###  or "Mihili" in file 
        mat_contents = sio.loadmat(os.path.join(directory, file))
        filename_parts = file.split("_neural_con_dis_index")
        new_filename = filename_parts[0][:7]+filename_parts[0][-6:] + "_embed_M1.npz"
        file_save = os.path.join(directory, new_filename)
        print(file_save)

        neural = mat_contents['neural_M1']
        continuous_index_2d = mat_contents['continuous_index']*10
        discrete_index = mat_contents['discrete_index']
        
        vectorized_map = np.vectorize(lambda x: angle_to_new_value[x])
        discrete_index = vectorized_map(discrete_index)
        continuous_index = np.hstack((continuous_index_2d, discrete_index*45))
        total_trial = int(discrete_index.shape[0]/dur)
        train_trial = int(total_trial*0.8)
        test_trial = total_trial-train_trial
        
        neural_train, neural_test, continuous_index_train, continuous_index_test = split_data(neural, continuous_index, train_trial)
        target_angle_train = continuous_index_train[:, 2].copy()
        target_angle_test = continuous_index_test[:, 2].copy()

        n_neurons = neural_train.shape[1]
        rates = [] 
        for i in range(n_conds):
            direction_trial = (target_angle_train//45 == i)
            trial_no_avg = neural_train[direction_trial, :].reshape(-1,n_neurons) ### 3D(Xtrials*40bin, 86neurons)
            rates.append(trial_no_avg)
        rate_stack = np.vstack(rates) ## (33200, 86)  
        pca = PCA(n_components=3)
#         rate_scaled = StandardScaler().fit_transform(rate_stack)
        pca_emb_train = pca.fit_transform(rate_stack) ### output==2D(33200, 3dims)

        rates = [] 
        for i in range(n_conds):
            direction_trial = (target_angle_test//45 == i)
            trial_no_avg = neural_test[direction_trial, :].reshape(-1,n_neurons) ### 3D(Xtrials*40bin, 86neurons)
            rates.append(trial_no_avg)
        rate_stack = np.vstack(rates) ## (33200, 86)  
        pca = PCA(n_components=3)
        pca_emb_test = pca.fit_transform(rate_stack) ### output==2D(33200, 3dims)
        
        fig = plt.figure(figsize=(10, 5), dpi=250)
        ax = fig.add_subplot(121, projection='3d')
        ax.scatter(pca_emb_train[:, 0],pca_emb_train[:, 1],pca_emb_train[:, 2],
                       c=target_angle_train/360, cmap=plt.cm.hsv,edgecolors='none',alpha=0.75,s=1)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_zticks([])
        xlims = ax.get_xlim()
        ylims = ax.get_ylim()
        zlims = ax.get_zlim()
        ax.grid(False)

        
        ax = fig.add_subplot(122, projection='3d')
        rates = [] 
        for i in range(n_conds):
            direction_trial = (target_angle_train//45 == i)
            trial_avg = neural_train[direction_trial, :].reshape(-1,dur,n_neurons).mean(axis=0) ### (40bin, 86neurons)
            rates.append(trial_avg)
        ### rates = StandardScaler().fit_transform(rates) >>>Fail, only for 1 or 2 dimensions
        rate_stack_avg = np.vstack(rates) ## 2D(8conds*40bin, 86neurons)
        pca = PCA(n_components=3)
#         rate_scaled = StandardScaler().fit_transform(rate_stack_avg)
        pca_emb_avg = pca.fit_transform(rate_stack_avg) ### output==2D(8conds*40bin, 3dims)
        pca_emb_avg = pca_emb_avg.reshape((n_conds, len(rates[0]), -1)) ### 3D(8conds, 40bin, 3dim)

        i = 0
        for traj in pca_emb_avg: ### traj==(40, 3)
            ax.plot(traj[:, 0],traj[:, 1],traj[:, 2],color=plt.cm.hsv(1/n_conds*i),linewidth=0.25)
            ax.scatter(traj[:, 0], traj[:, 1], traj[:, 2], color=plt.cm.hsv(1 / n_conds * i), s=3)
            ## ax.scatter(traj[0, 0], traj[0, 1], traj[0, 2],s=3) ### starting point
            i = i +1

        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_zticks([])
        ax.grid(False)

        new_filename = filename_parts[0][:7]+filename_parts[0][-6:] + "_Embedding_PCA.pdf"
        output_path = os.path.join(directory, new_filename)
        plt.savefig(output_path)
        plt.close(fig)
        
        np.savez(file_save,
                 cebra_veldir_train=pca_emb_avg,
                 continuous_index_train=continuous_index_train)

./data/Chewie_150319_embed_M1.npz
./data/Chewie_150629_embed_M1.npz
./data/Chewie_150313_embed_M1.npz
./data/Chewie_150630_embed_M1.npz


In [11]:
from dPCA import dPCA
### it requires 8 different target angles for alignment
output_dimension = 3
for file in files:
    if "Chewie" in file: ###  or "Mihili" in file 
        mat_contents = sio.loadmat(os.path.join(directory, file))
        filename_parts = file.split("_neural_con_dis_index")
        ### new_filename = filename_parts[0] + "_embed_M1.npz"
        new_filename = filename_parts[0][:7]+filename_parts[0][-6:] + "_embed_M1.npz"
        file_save = os.path.join(directory, new_filename)
        print(file_save)

        neural = mat_contents['neural_M1']
        continuous_index_2d = mat_contents['continuous_index']*10
        discrete_index = mat_contents['discrete_index']
        
        vectorized_map = np.vectorize(lambda x: angle_to_new_value[x])
        discrete_index = vectorized_map(discrete_index)
        continuous_index = np.hstack((continuous_index_2d, discrete_index*45))
        total_trial = int(discrete_index.shape[0]/dur)
        train_trial = int(total_trial*0.8)
        test_trial = total_trial-train_trial
        
        neural_train, neural_test, continuous_index_train, continuous_index_test = split_data(neural, continuous_index, train_trial)
        target_angle_train = continuous_index_train[:, 2].copy()
        target_angle_test = continuous_index_test[:, 2].copy()

        n_neurons = neural_train.shape[1]
        stimuli_trials = np.zeros(n_conds)
        for i in range(n_conds):
            direction_trial = (target_angle_train//45 == i)
            trial_dur_neuron = neural_train[direction_trial, :].reshape(-1,dur,n_neurons) ### (Xtrials, 40bin, 86neurons)
            stimuli_trials[i] = trial_dur_neuron.shape[0]
        min_trials = min(stimuli_trials).astype(int)
        rates_trial = []
        for i in range(n_conds):
            direction_trial = (target_angle_train//45 == i)
            trial_dur_neuron = neural_train[direction_trial, :].reshape(-1,dur,n_neurons) ### (Xtrials, 40bin, 86neurons)
            trial_single = trial_dur_neuron[:min_trials, :, :] ##(min-trials, 40, 86)
            rates_trial.append(trial_single)
        rate_stack_avg = np.stack(rates_trial, axis=-1)   ## (97, 40, 86, 8)
        trialR = np.transpose(rate_stack_avg, (0, 2, 3, 1)) ## (97trials, 86neurons, 8stimuli, 40bin)

        ### trial-average data over axis 0
        R = np.mean(trialR,0)
        # ### example code: center data
        R_temp = R.reshape((n_neurons,-1)) ### (Xneurons, 40bin*8stimuli)
        R -= np.mean(R_temp,1)[:,None,None] ### minus averaged response from all bin's for each neuron
        dpca = dPCA.dPCA(labels='st',regularizer='auto', n_components=3) ###default component is 10
        dpca.protect = ['t']
        # ### R =      3D(            100neurons, 6stimuli, 250time-points)
        # ### trialR = 4D(10trials,   100neurons, 6stimuli, 250time-points)
        # ### Z['s']=Z['t']=Z['st'] (Xcomponents, 6stimuli, 250time-points)
        Z = dpca.fit_transform(R,trialR)
        
        time = range(dur)
        fig = plt.figure(figsize=(10, 4))
        plt.subplot(131)
        for s in range(n_conds):
            plt.plot(time,Z['t'][0,s])
        plt.title('1st time component') 
        plt.subplot(132)
        for s in range(n_conds):
            plt.plot(time,Z['s'][0,s])   
        plt.title('1st stimulus component')   
        plt.subplot(133)
        for s in range(n_conds):
            plt.plot(time,Z['st'][0,s])   
        plt.title('1st mixing component')
        new_filename = filename_parts[0][:7]+filename_parts[0][-6:] + "_time_stimuli_mix_component.pdf"
        output_path = os.path.join(directory, new_filename)
        plt.savefig(output_path)
        plt.close(fig)
        
        dPCA_embed = Z['s'] ### (3emb-dim, 8stimuli, 40bin)
        fig = plt.figure(figsize=(9, 6))
        ax = fig.add_subplot(111, projection='3d')
        for i in range(n_conds):
            ax.scatter(dPCA_embed[0, i, :],dPCA_embed[1, i, :],dPCA_embed[2, i, :],
                       color=plt.cm.hsv(1 / 8 * i),edgecolors='none',alpha=1,s=10)
            ax.plot(dPCA_embed[0, i, :], dPCA_embed[1, i, :], dPCA_embed[2, i, :],
                    color=plt.cm.hsv(1 / 8 * i),linewidth=0.25,alpha=1) 
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_zticks([])
        ax.set_xlabel('PC1')
        ax.set_ylabel('PC2')
        ax.set_zlabel('PC3')
        new_filename = filename_parts[0][:7]+filename_parts[0][-6:] + "_Embedding_dPCA.pdf"
        output_path = os.path.join(directory, new_filename)
        plt.savefig(output_path)
        plt.close(fig)
        
        np.savez(file_save,
                 cebra_veldir_train=dPCA_embed,
                 continuous_index_train=continuous_index_train)        

./data/Chewie_150319_embed_M1.npz
You chose to determine the regularization parameter automatically. This can
                    take substantial time and grows linearly with the number of crossvalidation
                    folds. The latter can be set by changing self.n_trials (default = 3). Similarly,
                    use self.protect to set the list of axes that are not supposed to get to get shuffled
                    (e.g. upon splitting the data into test- and training, time-points should always
                    be drawn from the same trial, i.e. self.protect = ['t']). This can significantly
                    speed up the code.
Start optimizing regularization.
Starting trial  1 / 3
Starting trial  2 / 3
Starting trial  3 / 3
Optimized regularization, optimal lambda =  0.006640873038344959
Regularization will be fixed; to compute the optimal                    parameter again on the next fit, please                    set opt_regularizer_flag to True.
./data/Chewie_150