<a href="https://colab.research.google.com/github/sazio/NMAs/blob/main/src/UMAP%2BClustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# UMAP + Clustering on Raw Data vs RNNs Hidden Representations
@authors: Simone Azeglio, Chetan Dhulipalla , Khalid Saifullah 


Part of the code here has been taken from [Neuromatch Academy's Computational Neuroscience Course](https://compneuro.neuromatch.io/projects/neurons/README.html), and specifically from [this notebook](https://colab.research.google.com/github/NeuromatchAcademy/course-content/blob/master/projects/neurons/load_stringer_spontaneous.ipynb)

## Loading of Stringer spontaneous data



In [1]:
#@title Data retrieval
import os, requests

fname = "stringer_spontaneous.npy"
url = "https://osf.io/dpqaj/download"

if not os.path.isfile(fname):
    try:
        r = requests.get(url)
    except requests.ConnectionError:
        print("!!! Failed to download data !!!")
    else:
        if r.status_code != requests.codes.ok:
            print("!!! Failed to download data !!!")
        else:
            with open(fname, "wb") as fid:
                fid.write(r.content)

In [2]:
#@title Import matplotlib and set defaults
from matplotlib import rcParams 
from matplotlib import pyplot as plt
rcParams['figure.figsize'] = [20, 4]
rcParams['font.size'] =15
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False
rcParams['figure.autolayout'] = True

## Exploratory Data Analysis (EDA)

In [3]:
#@title Data loading
import numpy as np
dat = np.load('stringer_spontaneous.npy', allow_pickle=True).item()
print(dat.keys())

dict_keys(['sresp', 'run', 'beh_svd_time', 'beh_svd_mask', 'stat', 'pupilArea', 'pupilCOM', 'xyz'])


In [4]:
# functions 

def moving_avg(array, factor = 5):
    """Reducing the number of compontents by averaging of N = factor
    subsequent elements of array"""
    zeros_ = np.zeros((array.shape[0], 2))
    array = np.hstack((array, zeros_))

    array = np.reshape(array, (array.shape[0],  int(array.shape[1]/factor), factor))
    array = np.mean(array, axis = 2)

    return array

## Extracting Data for RNN (or LFADS)
The first problem to address is that for each layer we don't have the exact same number of neurons. We'd like to have a single RNN encoding all the different layers activities, to make it easier we can take the number of neurons ($N_{neurons} = 1131$ of the least represented class (layer) and level out each remaining class. 

In [None]:
# Extract labels from z - coordinate
from sklearn import preprocessing
x, y, z = dat['xyz']

le = preprocessing.LabelEncoder()
labels = le.fit_transform(z)
### least represented class (layer with less neurons)
n_samples = np.histogram(labels, bins=9)[0][-1]

In [None]:
### Data for LFADS / RNN 
import pandas as pd 
dataSet = pd.DataFrame(dat["sresp"])
dataSet["label"] = labels 

In [None]:
# it can be done in one loop ... 
data_ = []

for i in range(0, 9):
    data_.append(dataSet[dataSet["label"] == i].sample(n = n_samples).iloc[:,:-1].values)

dataRNN = np.zeros((n_samples*9, dataSet.shape[1]-1))
for i in range(0,9):
    
    # dataRNN[n_samples*i:n_samples*(i+1), :] = data_[i]
    ## normalized by layer
    dataRNN[n_samples*i:n_samples*(i+1), :] = data_[i]/np.mean(np.asarray(data_)[i,:,:], axis = 0)

## shuffling for training purposes

#np.random.shuffle(dataRNN)

In [None]:
# downsampling and averaging 
#avgd_normed_dataRNN = dataRNN#
avgd_normed_dataRNN = moving_avg(dataRNN, factor=2)

In [None]:
plt.plot(avgd_normed_dataRNN[0,:])

##  UMAP Visualization (Semi-Supervised) on Raw Data
The result is not encouraging, let's see if the RNN has captured some meaningful dynamics and UMAP can extract it in 2D.

In [None]:
!pip install umap-learn hdbscan --quiet

In [None]:
# Dimension reduction and clustering libraries
import umap
#import hdbscan
import sklearn.cluster as cluster
from sklearn.model_selection import train_test_split

In [None]:
b = np.ones(1131)
labels = np.hstack((0*b,b, 2*b, 3*b,4*b, 5*b, 6*b, 7*b, 8*b))

In [None]:
X_train,  X_test, y_train, y_test = train_test_split(avgd_normed_dataRNN, labels, test_size = 0.10, random_state = 2021)

In [None]:
#factor = 6 
emb = umap.UMAP(random_state = 2021, n_components=2, n_neighbors= 45, min_dist = 0.3).fit(X_train, y = y_train)

In [None]:
test_embedding = emb.transform(X_test)

In [None]:
fig = plt.figure(figsize = (15, 8))
#ax = fig.add_subplot(projection='3d')
#plt.scatter(emb[:1131*factor,0], emb[:1131*factor,1], emb[:1131*factor, 2],  c = labels ,cmap = 'Spectral')
plt.scatter(*emb.embedding_.T,  c = y_train ,cmap = 'Spectral')
cbar = plt.colorbar(boundaries=np.arange(9))
cbar.set_ticks(np.arange(9))
cbar.set_ticklabels(["0", "1", "2", "3", "4", "5", "6", "7", "8"])

In [None]:
fig = plt.figure(figsize = (15, 8))
#ax = fig.add_subplot(projection='3d')
#plt.scatter(emb[:1131*factor,0], emb[:1131*factor,1], emb[:1131*factor, 2],  c = labels ,cmap = 'Spectral')
plt.scatter(*test_embedding.T,  c = y_test ,cmap = 'Spectral')
cbar = plt.colorbar(boundaries=np.arange(9))
cbar.set_ticks(np.arange(9))
cbar.set_ticklabels(["0", "1", "2", "3", "4", "5", "6", "7", "8"])

## UMAP on RNN's Reduced dynamics

### Extract Dynamics


In [5]:
# Extract labels from z - coordinate
from sklearn import preprocessing
x, y, z = dat['xyz']

le = preprocessing.LabelEncoder()
labels = le.fit_transform(z)
### least represented class (layer with less neurons)
n_samples = np.histogram(labels, bins=9)[0][-1]

In [6]:
### Data for LFADS / RNN 
import pandas as pd 
dataSet = pd.DataFrame(dat["sresp"])
dataSet["label"] = labels 

In [7]:
# it can be done in one loop ... 
data_ = []
for i in range(0, 9):
    data_.append(dataSet[dataSet["label"] == i].sample(n = n_samples).iloc[:,:-1].values)

dataRNN = np.zeros((n_samples, dataSet.shape[1]-1, 9))
for i in range(0,9):
    
    # dataRNN[n_samples*i:n_samples*(i+1), :] = data_[i]
    ## normalized by layer
    dataRNN[:, :, i] = data_[i]/np.mean(np.asarray(data_)[i,:,:], axis = 0)


In [8]:
# functions 

def moving_avg(array, factor = 2):
    """Reducing the number of compontents by averaging of N = factor
    subsequent elements of array"""
    #zeros_ = np.zeros((array.shape[0], 2))
    #array = np.hstack((array, zeros_))
    
    array = np.reshape(array, (array.shape[0],  int(array.shape[1]/factor), factor, array.shape[2]))
    array = np.mean(array, axis = 2)

    return array

In [9]:
# downsampling and averaging 
avgd_normed_dataRNN = moving_avg(dataRNN, factor=2)

In [10]:
#!pip3 install torch --upgrade

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.cuda.empty_cache()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.__version__) # should be 1.9.0 for training (Mish Activation)

1.9.0+cu102


In [12]:
# set the seed
np.random.seed(42)

# number of neurons 
NN = dataRNN.shape[0]

In [13]:
frac = 4/5
#n_neurs = 1131
# let's use n_neurs/10 latent components
ncomp = 75 #int(n_neurs/10)

x1_train = torch.from_numpy(avgd_normed_dataRNN[:,:int(frac*avgd_normed_dataRNN.shape[1]), : ]).to(device).float()
x2_train = torch.from_numpy(avgd_normed_dataRNN[:,:int(frac*avgd_normed_dataRNN.shape[1]),: ]).to(device).float()

x1_valid = torch.from_numpy(avgd_normed_dataRNN[:,int(frac*avgd_normed_dataRNN.shape[1]):, :] ).to(device).float()
x2_valid = torch.from_numpy(avgd_normed_dataRNN[:,int(frac*avgd_normed_dataRNN.shape[1]):, :]).to(device).float()

NN1 = x1_train.shape[0]
NN2 = x2_train.shape[0]


In [47]:
class Net(nn.Module):
    def __init__(self, ncomp, NN1, NN2, bidi=True):
        super(Net, self).__init__()

        # play with some of the options in the RNN!
        
        self.rnn = nn.LSTM(NN1, ncomp, num_layers = 1, dropout = 0.3,
                         bidirectional = bidi)
        """
        self.rnn = nn.RNN(NN1, ncomp, num_layers = 1, dropout = 0,
                    bidirectional = bidi, nonlinearity = 'tanh')
        self.rnn = nn.GRU(NN1, ncomp, num_layers = 1, dropout = 0,
                         bidirectional = bidi)
        """
        
        self.mlp = nn.Sequential(
                    nn.Linear(ncomp, ncomp*2),
                    nn.Mish(),
                    nn.Linear(ncomp*2, ncomp*2),
                    nn.Mish(),
                    nn.Dropout(0.25),
                    nn.Linear(ncomp*2, ncomp), 
                    nn.Mish())
        
        self.fc = nn.Linear(ncomp, NN2)

    def forward(self, x):
        x = x.permute(1, 2, 0)
        #print(x.shape)
        # h_0 = torch.zeros(2, x.size()[1], self.ncomp).to(device)
        
        y, h_n = self.rnn(x)

        #print(y.shape)
        #print(h_n.shape)
        if self.rnn.bidirectional:
          # if the rnn is bidirectional, it concatenates the activations from the forward and backward pass
          # we want to add them instead, so as to enforce the latents to match between the forward and backward pass
            q = (y[:, :, :ncomp] + y[:, :, ncomp:])/2
        else:
            q = y
        
        q = self.mlp(q)

        # the softplus function is just like a relu but it's smoothed out so we can't predict 0
        # if we predict 0 and there was a spike, that's an instant Inf in the Poisson log-likelihood which leads to failure
        #z = F.softplus(self.fc(q), 10)
        #print(q.shape)
        z = self.fc(q).permute(2, 0, 1)
        # print(z.shape)
        return z, q

In [48]:
# we initialize the neural network
net = Net(ncomp, NN1, NN2, bidi = True).to(device)

# we set up the optimizer. Adjust the learning rate if the training is slow or if it explodes.
# optimizer1 = torch.optim.Adam(net.parameters(), lr= 0.0003795, weight_decay= 10e-6)

  "num_layers={}".format(dropout, num_layers))


In [49]:
# forward check 
# net(x1)
net(x1_train)

(tensor([[[ 1.2995e-01,  1.1363e-01,  1.1584e-01,  ...,  1.1691e-01,
            1.2452e-01,  1.2659e-01],
          [ 1.2849e-01,  1.0682e-01,  1.1480e-01,  ...,  1.1263e-01,
            1.2770e-01,  1.3833e-01],
          [ 1.2913e-01,  1.2242e-01,  1.1338e-01,  ...,  1.2603e-01,
            1.2066e-01,  1.3443e-01],
          ...,
          [ 1.3281e-01,  1.1721e-01,  1.2239e-01,  ...,  1.4201e-01,
            1.2252e-01,  1.3039e-01],
          [ 1.1495e-01,  1.2263e-01,  1.1979e-01,  ...,  1.2739e-01,
            1.2342e-01,  1.3211e-01],
          [ 1.2958e-01,  1.2760e-01,  1.2604e-01,  ...,  1.2060e-01,
            1.1678e-01,  1.3745e-01]],
 
         [[-9.4108e-04,  2.5550e-03,  1.0541e-02,  ...,  6.9388e-03,
            1.0219e-02,  1.5206e-02],
          [ 3.5089e-03,  1.9745e-03,  8.2637e-03,  ...,  1.8807e-03,
            8.6694e-03,  1.5735e-02],
          [ 7.1012e-03, -5.5105e-03,  2.0005e-03,  ...,  5.7611e-03,
            1.6753e-02,  1.9461e-02],
          ...,
    

In [30]:
from tqdm.notebook import tqdm

In [None]:
# For kaggle directory
#import os 
#os.chdir("../input/neuralmanifoldanimals")
from sam import SAM

In [None]:
base_optimizer = torch.optim.Adam  # define an optimizer for the "sharpness-aware" update
optimizer = SAM(net.parameters(), base_optimizer, lr=0.000974, weight_decay = 0.00001)#, momentum=0.9)

# you can keep re-running this cell if you think the cost might decrease further

cost = nn.MSELoss()

train_save = []
valid_save = []

niter = 50000 #+30000 # 
# rnn_loss = 0.2372, lstm_loss = 0.2340, gru_lstm = 0.2370
for k in tqdm(range(niter)):
    net.train()
    # the network outputs the single-neuron prediction and the latents
    z, y = net(x1_train)

    # our cost
    loss = cost(z, x2_train)

    # train the network as usual
    loss.backward()
    optimizer.first_step(zero_grad = True)
    cost(net(x1_train)[0],x2_train).backward()
    
    optimizer.second_step(zero_grad=True)
    
    """ """;
    # this can be run in a first phase, before wandb finetuning
    with torch.no_grad():
        net.eval()
        train_save.append(loss.item())
        valid_loss = cost(net(x1_valid)[0], x2_valid)
        valid_save.append(valid_loss.item())
   

    if k % 50 == 0:
        with torch.no_grad():
            net.eval()
            valid_loss = cost(net(x1_valid)[0], x2_valid)
            
            print(f' iteration {k}, train cost {loss.item():.4f}, valid cost {valid_loss.item():.4f}')

  0%|          | 0/50000 [00:00<?, ?it/s]

 iteration 0, train cost 3.5095, valid cost 3.6108
 iteration 50, train cost 2.4659, valid cost 2.5650
 iteration 100, train cost 2.4596, valid cost 2.5607
 iteration 150, train cost 2.4439, valid cost 2.5434
 iteration 200, train cost 2.2830, valid cost 2.3661
 iteration 250, train cost 2.1381, valid cost 2.2040
 iteration 300, train cost 2.0266, valid cost 2.0887
 iteration 350, train cost 1.9714, valid cost 2.0330
 iteration 400, train cost 1.9521, valid cost 2.0156
 iteration 450, train cost 1.9419, valid cost 2.0057
 iteration 500, train cost 1.9313, valid cost 1.9946
 iteration 550, train cost 1.9190, valid cost 1.9810
 iteration 600, train cost 1.9031, valid cost 1.9636
 iteration 650, train cost 1.8865, valid cost 1.9435
 iteration 700, train cost 1.8671, valid cost 1.9187
 iteration 750, train cost 1.8457, valid cost 1.8907
 iteration 800, train cost 1.8280, valid cost 1.8687
 iteration 850, train cost 1.8132, valid cost 1.8518
 iteration 900, train cost 1.7982, valid cost 1.8

In [43]:
#import os 
#os.chdir("../../working")

In [None]:
torch.save(net.state_dict(), "Net_Complete.pt")

In [None]:
np.save("train_curve.npy", np.asarray(train_save))
np.save("valid_curve.npy", np.asarray(valid_save))

In [None]:
while True:
    pass

In [None]:
#Load the model back 😂 (it took one day to understand how to do it!)
#model = Net(ncomp, NN1, NN2, bidi = True).to(device)
#model.load_state_dict(torch.load("Net_Complete.pt"))