In [23]:
import numpy as np
import scipy.io as sio 
from preprocessing_funcs import get_spikes_with_history
import torch.nn as nn
import torch 
import matplotlib.pyplot as plt

verbose = False #set this if you want to print the corr coef at each epoch

import torch.nn.utils.prune as prune


In [31]:
# mount with the drive
import os

# # need to run only first time
main_path = os.path.join('data')
result_path = os.path.join('results', 'ML_DATA')
figure_path = os.path.join('results', 'ML_DATA')

In [39]:
class ThresholdPruning(prune.BasePruningMethod):
    PRUNING_TYPE = "unstructured"

    def __init__(self, threshold):
        self.threshold = threshold

    def compute_mask(self, tensor, default_mask):
        return torch.abs(tensor) > self.threshold

In [40]:
class LSTM(nn.Module):
    
    def __init__(self, TrainX, TrainY, n_hidden= 5 ,n_layers = 1, lr=0.001): # no dropout for now 
        super().__init__()
        self.n_hidden = n_hidden
        self.lr = lr
        self.n_layers = n_layers
        self.input_dim = TrainX.shape[2]
        self.output_dim = TrainY.shape[1]
        self.seq_len = TrainX.shape[1]
        self.batch_size = TrainX.shape[0]

        """self.net = nn.Sequential(nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True), 
                         nn.LSTM(hidden_dim, hidden_dim, n_layers, batch_first=True), 
                        nn.Linear(n_hidden, (TrainY.shape[1])))"""
        # lstm layers
        self.lstm = nn.LSTM(self.input_dim, self.n_hidden, self.n_layers, batch_first=False)
        self.lstm2 = nn.LSTM(self.n_hidden, self.n_hidden, self.n_layers, batch_first=False)
        #output layer
        self.fc = nn.Linear(self.n_hidden, self.output_dim)
        
    def binarize_weights(self, ind_layer) : 
        net = self.lstm2
        if ind_layer == 0 :
            net = self.lstm
        weights = net.weight_ih_l0
        for idx, w_ in enumerate(weights) : 
            with torch.no_grad() :
                #net.weight_ih_l0[idx] = net.weight_ih_l0[idx].clone()
                arr = net.weight_ih_l0[idx].numpy()
                arr[arr<0] = -1.
                arr[arr>= 0] = 1.

    
    def threshold_pruning(self) :
        parameters_to_prune = ((self.lstm, "weight_ih_l0"), (self.lstm2, "weight_ih_l0"), (self.fc, "weight"))
        prune.global_unstructured(parameters_to_prune, pruning_method=ThresholdPruning, threshold= 0.075)
        print("pruning")

    def forward(self, TrainX, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
        ## Get the outputs and the new hidden state from the lstm
        new_input = torch.ones((self.seq_len, TrainX.shape[0], self.input_dim))
        for i in range(self.seq_len):
            new_input[i] = TrainX[:,i,:]
       
        TrainX = new_input
        r_output, hidden = self.lstm(TrainX, hidden)
        r_output, hidden = self.lstm2(r_output, hidden)
       
        ## put x through the fully-connected layer
        out = self.fc(r_output)
        return out, hidden
    
    
    
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        hidden_state = torch.zeros(n_layers, batch_size, self.n_hidden)
        cell_state = torch.randn(n_layers, batch_size, self.n_hidden)
        hidden = (hidden_state, cell_state)

        return hidden

In [41]:
from torch.utils.data import Dataset, DataLoader, random_split

def train(TrainX, TrainY,TestX, TestY, net, lossfunc, optimizer, num_epoch, clip = 5, finger =0):
    seq_len = TrainX.shape[1]
    train_length = int(0.8*len(TrainX))
    val_length = len(TrainX)-train_length
    train_data = TrainX[:train_length]
    val_data = TrainX[train_length:]
    train_label = TrainY[:train_length]
    val_label = TrainY[train_length:]
    list_corr_train = []
    list_corr_val = []
    list_corr_test = []
    
   
    for epoch in range(num_epoch):
        # TODO: Step 1 - create torch variables corresponding to features and labels

        #x = TrainX.reshape([seq_len, TrainX.shape[0],TrainX.shape[1]])
        x = torch.from_numpy(train_data).float()
        y = torch.from_numpy(train_label).float()
        xv = torch.from_numpy(val_data).float()
        yv = torch.from_numpy(val_label).float()
        xt = torch.from_numpy(TestX).float()
        yt = torch.from_numpy(TestY).float()
        # initialize hidden state 
        h = net.init_hidden(train_data.shape[0])
        hv = net.init_hidden(val_data.shape[0])
        ht = net.init_hidden(TestX.shape[0])
        # TODO: Step 2 - compute model predictions and loss
        pred, h = net(x, h)
        
        
        loss = lossfunc(pred[-1,:,:], y)
        #The main idea is to calculate the correlation as dot product between two features. Larger result indicates more similar
        #loss = corr_coeff(pred[-1,:,:], y)
        
        # TODO: Step 3 - do a backward pass and a gradient update step
        optimizer.zero_grad()
        loss.backward()
        # gradient clipping - prevents gradient explosion 
        nn.utils.clip_grad_norm_(net.parameters(), clip)
        optimizer.step()
        corrcoef_train = np.corrcoef(pred[-1,:,:].detach().numpy().reshape((-1,)),y.detach().numpy().reshape((-1,)))
        list_corr_train += [corrcoef_train[0,1]]
        predv, hv = net(xv, hv)
        corrcoefv = np.corrcoef(predv[-1,:,:].detach().numpy().reshape((-1,)),yv.detach().numpy().reshape((-1,)))
        list_corr_val += [corrcoefv[0,1]]
        predt, ht = net(xt, ht)
        corrcoeft = np.corrcoef(predt[-1,:,:].detach().numpy().reshape((-1,)),yt.detach().numpy().reshape((-1,)))
        list_corr_test += [corrcoeft[0,1]]
        
        if verbose:
            print ('Epoch [%d/%d], Loss: %.4f' %(epoch+1, num_epoch, loss.item()))
            print ('Correlation coefficient train : {corrcoef}'.format(corrcoef=corrcoef_train[0,1]))
            print ('Correlation coefficient validation: {corrcoef}'.format(corrcoef=corrcoefv[0,1]))
            print ('Correlation coefficient test: {corrcoef}'.format(corrcoef=corrcoeft[0,1]))
    return list_corr_train, list_corr_val, list_corr_test
    


In [42]:
# train using correlation coefficient for loss 
def corr_coeff(x, y):
    vx = x - torch.mean(x)
    vy = y - torch.mean(y)

    cost = torch.mean(vx * vy) / (torch.sqrt(torch.mean(vx ** 2)) * torch.sqrt(torch.mean(vy ** 2)))
    return (1 - cost)

In [43]:
# preprocessing 
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler = StandardScaler()

def preprocessing(X, testX, Y, testY) : 
    
    # fit and transform the data 
    scaler_data = scaler.fit_transform(X)
    testX = scaler.transform(testX)
    return scaler_data, testX, Y, testY

In [None]:
#list_ = [0.01, 0.05, 0.1, 0.125, 0.15, 0.175]
#list_ = [0.06, 0.07, 0.075, 0.08]
list_ = [0.075]

import pickle

   
"for threshold in list_ : "
    
for Idx_subject in list([10]): # 3 subjects index 10-12
        prediction = [[],[],[],[],[],[]]
        predictionTest = [[],[],[],[],[],[]]
        model = [[],[],[],[],[],[]]
        all_corrcoef = [[],[],[],[],[],[]]
        all_corrcoef_test = [[],[],[],[],[],[]]
        all_corrcoef_val = [[],[],[],[],[],[]]
        for Finger in list([0, 1, 2, 3, 4]): # 5 fingers for each subject. 0:thumb, 1:index, 2:middle ...
            
            #load training data (TrainX: feature vectors, TrainY: labels)
            matData = sio.loadmat(main_path + '/BCImoreData_Subj_'+str(Idx_subject)+'_200msLMP.mat')
            TrainX = matData['Data_Feature'].transpose()
            TrainY = matData['SmoothedFinger']
            TrainY = TrainY [:,Finger]
            TrainY = TrainY.reshape(TrainY.shape[0],1)
            #load testing data (TestX: feature vectors, TestY: labels)
            matData = sio.loadmat(main_path + '/BCImoreData_Subj_'+str(Idx_subject)+'_200msLMPTest.mat')
            TestX = matData['Data_Feature'].transpose()
            TestY = matData['SmoothedFinger']
            TestY = TestY[:,Finger]
            TestY = TestY.reshape(TestY.shape[0],1)
            
            # preprocessing 
            print("preprocessing...")
            TrainX,TestX,TrainY,TestY  = preprocessing(TrainX,TestX,TrainY,TestY)
            

            # from here, we reconstruct the input by "looking back" a few steps
            bins_before= 20 #How many bins of neural data prior to the output are used for decoding
            bins_current=1 #Whether to use concurrent time bin of neural data
            bins_after=0 #How many bins of neural data after the output are used for decoding
            
            TrainX=get_spikes_with_history(TrainX,bins_before,bins_after,bins_current)

            TrainX, TrainY = TrainX[bins_before:,:,:], TrainY[bins_before:,]
         
            TestX=get_spikes_with_history(TestX,bins_before,bins_after,bins_current)
            TestX, TestY = TestX[bins_before:,:,:], TestY[bins_before:,]
            
            # Now, we reconstructed TrainX/TestX to have a shape (num_of_samples, sequence_length, input_size)
            # You can fit this to the LSTM
            
            print("run for finger ", Finger)

            n_hidden = 20
            n_layers = 5
            n_epochs =  50 #CHANGE THIS

            net = LSTM(TrainX, TrainY,  n_hidden, n_layers)
            
            lossfunc = nn.MSELoss()
            optimizer = torch.optim.Adamax(net.parameters())
            
            print("training ")
            net.train()
  
            n_epochs =  50
            net.train()

            corr_train, corr_val, corr_test = train(TrainX, TrainY,TestX,TestY, net, lossfunc, optimizer,n_epochs, clip = 5, finger = Finger)
            net.eval()
            trainPred,h = net(torch.from_numpy(TrainX).float(), net.init_hidden(TrainX.shape[0]))
            prediction[Finger] = trainPred[-1,:,:].detach().numpy().reshape((-1,))
            corrcoef = np.corrcoef(prediction[Finger],TrainY.reshape((-1,)))
            all_corrcoef[Finger] = corr_train
            print ('Correlation coefficient train : {corrcoef}'.format(corrcoef=corrcoef[0,1]))
            model[Finger] = net
            testPred,h = net(torch.from_numpy(TestX).float(), net.init_hidden(TestX.shape[0]))
            predictionTest[Finger] = testPred[-1,:,:].detach().numpy().reshape((-1,))
            corrcoef = np.corrcoef(predictionTest[Finger],TestY.reshape((-1,)))
            all_corrcoef_test[Finger] = corr_test
            all_corrcoef_val[Finger] = corr_val
            print ('Correlation coefficient test : {corrcoef}'.format(corrcoef=corrcoef[0,1]))  
            
            
            net.threshold_pruning()

            print("retrain the pruned network ")
            n_epochs =  100
            corr_train, corr_val, corr_test = train(TrainX, TrainY,TestX,TestY, net, lossfunc, optimizer,n_epochs, clip = 5, finger = Finger)
            net.eval()
            trainPred,h = net(torch.from_numpy(TrainX).float(), net.init_hidden(TrainX.shape[0]))
            prediction[Finger] = trainPred[-1,:,:].detach().numpy().reshape((-1,))
            corrcoef = np.corrcoef(prediction[Finger],TrainY.reshape((-1,)))
            all_corrcoef[Finger] = corr_train
            print ('Correlation coefficient train : {corrcoef}'.format(corrcoef=corrcoef[0,1]))
            model[Finger] = net
            testPred,h = net(torch.from_numpy(TestX).float(), net.init_hidden(TestX.shape[0]))
            predictionTest[Finger] = testPred[-1,:,:].detach().numpy().reshape((-1,))
            corrcoef = np.corrcoef(predictionTest[Finger],TestY.reshape((-1,)))
            all_corrcoef_test[Finger] = corr_test
            all_corrcoef_val[Finger] = corr_val
            print ('Correlation coefficient test : {corrcoef}'.format(corrcoef=corrcoef[0,1])) 
            
        #with open(os.path.join(result_path, 'corrcoefNoBin_'+str(n_epochs)+'E_Sbj' + str(Idx_subject)), 'wb') as f:
        #  pickle.dump(all_corrcoef, f) 
        #with open(os.path.join(result_path, 'corrcoefNoBin_val_'+str(n_epochs)+'E_Sbj' + str(Idx_subject)), 'wb') as f:
        #  pickle.dump(all_corrcoef_val, f)    
        #with open(os.path.join(result_path, 'corrcoefNoBin__test_'+str(n_epochs)+'E_Sbj' + str(Idx_subject)), 'wb') as f:
        #  pickle.dump(all_corrcoef_test, f)  
        with open(os.path.join(result_path, 'finalPredictionsNoBin_'+str(n_epochs)+'E_Sbj' + str(Idx_subject)), 'wb') as f:
            pickle.dump(prediction, f)
        with open(os.path.join(result_path, 'finalmodelNoBin_'+str(n_epochs)+'E_Sbj' + str(Idx_subject)), 'wb') as fi:
            pickle.dump(model, fi)
        with open(os.path.join(result_path, 'finaltestPredictionsNoBin_'+str(n_epochs)+'E_Sbj' + str(Idx_subject)), 'wb') as f:
            pickle.dump(predictionTest, f)
        

preprocessing...
run for finger  0
training 


In [None]:
import pandas as pd
res_100 = pd.read_pickle('results/ML_DATA/TestpredictionsPruning_200E_Sbj10')
res_100

In [None]:
with open(os.path.join(result_path, 'corrcoef_testNoBin_100E_Sbj10'), 'rb') as f:
    all_corrcoef_test = pickle.load(f)
with open(os.path.join(result_path, 'corrcoef_valNoBin_100E_Sbj10'), 'rb') as f:
    all_corrcoef_val = pickle.load(f)
with open(os.path.join(result_path, 'corrcoefNoBin_100E_Sbj10'), 'rb') as f:
    all_corrcoef = pickle.load(f)
fig_corrcoef = plt.figure(figsize=(15,10))
plt.plot( all_corrcoef[0])
plt.plot( all_corrcoef_val[0])
plt.plot( all_corrcoef_test[0])
plt.title('Correlation coefficient evaluation for 100 epoch without binarization ')

plt.legend(["Train corrcoef ", "Validation corrcoef ", "Test corrcoef"], loc = 'best')
fig_corrcoef.savefig(os.path.join(figure_path, 'Corrcoef '+str(len(all_corrcoef_test))+' epochs with binarization subj 10 finger 0.png'), format='png')

In [None]:
with open(os.path.join(result_path, 'corrcoef_testBin_100E_Sbj10'), 'rb') as f:
    all_corrcoef_test = pickle.load(f)
with open(os.path.join(result_path, 'corrcoef_valBin_100E_Sbj10'), 'rb') as f:
    all_corrcoef_val = pickle.load(f)
with open(os.path.join(result_path, 'corrcoefBin_100E_Sbj10'), 'rb') as f:
    all_corrcoef = pickle.load(f)
fig_corrcoef = plt.figure(figsize=(15,10))
plt.plot( all_corrcoef[0])
plt.plot( all_corrcoef_val[0])
plt.plot( all_corrcoef_test[0])
plt.title('Correlation coefficient evaluation for 100 epoch when using binarization (-2..2)')
plt.legend(["Train corrcoef ", "Validation corrcoef ", "Test corrcoef"], loc = 'best')
fig_corrcoef.savefig(os.path.join(figure_path, 'Corrcoef '+str(len(all_corrcoef_test))+' epochs with binarization subj 10 finger 0.png'), format='png')

In [None]:
print("ratios : ") 
print(ratios)

In [None]:
import torch.nn.utils.prune as prune

# select a pruning technique from pytorch 

# choose the percentage of connections hat you would like to prune 

# it has do be pruned and then retrained on the remaining weights so the accuracy can go up again.

In [77]:
# pruning does work, more and more weight units are set to zero while running. 

# todo : 
- complete table III report
- do big run on all subjects for thrs 0.075
- clean code 
- merge branch git hub 


# done : 
- try with multiple layers 
- find the right percentage of connections to prune 
- see if there is a dimension (channel) that is better for pruning than another 
- try to do our own class with a threshold instead of a percentage of 
- compute the number of pruned connection for each layer and globally 
- try with pruning on the linear layer 
- find the right lethod of pruning 
- find the right lethod of pruning 