# Importing Data from Allen Visual Behavior Dataset

In [21]:
#load useful packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('notebook', font_scale=1.5, rc={'lines.markeredgewidth': 2})
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [None]:
#download datafram file
!wget --content-disposition https://ndownloader.figshare.com/files/28470255

In [3]:
#set the dataframe
filename = "allen_visual_behavior_2p_change_detection_familiar_novel_image_sets.parquet"
data = pd.read_parquet(filename)

# Extracting Train and Test Data

In [4]:
#Extraction of Novel and Familiar data in two variables
familiar_data_pr = data[(data.cre_line=='Vip-IRES-Cre')&(data.exposure_level=="familiar")&(data.omitted==False)]
novel_data_pr=data[(data.cre_line=='Vip-IRES-Cre')&(data.exposure_level=="novel")&(data.omitted==False)]

In [None]:
#Set label 1 for familiar pictures and concatenate it with variable
labels = np.ones_like(familiar_data_pr.trace.values)
familiar_data_pr["labels"] = labels

In [None]:
#Set label 0 for novel pictures and concatenate it with variable
novellabels = np.zeros_like(novel_data_pr.trace.values)
novel_data_pr["labels"] = novellabels

In [7]:
#Concat two Tables and Randomize them
concated = pd.concat([novel_data_pr,familiar_data_pr])                 #concat
gg = concated.sample(frac=1)                                           #Randomize

In [8]:
#Setting the Timesteps(index) for extracting features
t_start_indx=0
t_end_indx=-1

In [9]:
#Import Calcium Trace values and extracr our selected Features within our Start and End Index
imput_data = gg.trace.values
new_input=[]
for i in range(len(imput_data)):
  zz = imput_data[i][t_start_indx:t_end_indx]
  new_input.append(zz)

In [10]:
#Extracting Labels from randomize table
imputlabel_data = gg.labels.values

In [11]:
#Divide Data in Train and Test Dataset
percent = 0.7                                                     #the Amount of Division for train
train_x=new_input[0:int(percent*len(new_input))]
train_y=imputlabel_data[0:int(percent*len(new_input))]
test_x = new_input[int(percent*len(new_input)):-1]
test_y = imputlabel_data[int(percent*len(new_input)):-1]

In [12]:
#Convert Floats to int for Sklearn Labels
train_y =train_y.astype(np.int)
test_y =test_y.astype(np.int)

# Model Creation

Logistic Regression

In [13]:
#Logistic Regression
accoflog = cross_val_score(LogisticRegression(penalty='l1',max_iter=50000,solver="liblinear"), train_x, train_y, cv=8) # k=8 crossvalidation

In [15]:
print(f"Different Folds Accuracy: {accoflog} and Mean accuracy of {np.mean(accoflog)}")

Different Folds Accuracy: [0.68268379 0.69818423 0.6840124  0.6840124  0.68312666 0.68179805
 0.68932684 0.67759079] and Mean accuracy of 0.6850918954827281


Linead Discrimination Analyze

In [16]:
#Linear Discrimination Analyze
accoflda = cross_val_score(LDA(), train_x, train_y, cv=8)

In [17]:
print(f"Different Folds Accuracy: {accoflda} and Mean accuracy of {np.mean(accoflda)}")

Different Folds Accuracy: [0.67426926 0.68600531 0.67404783 0.670062   0.67094774 0.67139061
 0.67670505 0.67139061] and Mean accuracy of 0.6743523029229407


Random Forest

In [31]:
#Random Forest
accofrandomforest= cross_val_score(RandomForestClassifier(max_depth=50, random_state=0,criterion="entropy"), train_x, train_y, cv=8) # k=8 crossvalidation

In [None]:
print(f"Different Folds Accuracy: {accofrandomforest} and Mean accuracy of {np.mean(accofrandomforest)}")

Neural Network

In [22]:
#creating Model Artitcture
class Machine(nn.Module):
  def __init__(self):
     super().__init__()
     self.encoder=nn.Sequential(
         nn.Linear(84,32),
         nn.ReLU(),
         nn.Linear(32,16),
         nn.ReLU(),
         nn.Linear(16,8),
         nn.ReLU(),
         nn.Linear(8,1),
         nn.Sigmoid()
      )
  def forward(self,x):
    result = self.encoder(x)
    return result

In [23]:
#changing Shape to [1,...]
train_y=np.reshape(train_y,(-1,1))
test_y=np.reshape(test_y,(-1,1))

In [24]:
#train_data_loader_class
class train_data(Dataset):
  def __init__(self,x,y):
    self.x=torch.from_numpy(np.array(x)).float()
    self.y = torch.from_numpy(y).float()
  def __getitem__(self,indx):
    return self.x[indx],self.y[indx]
  
  def __len__(self):
    return len(self.x)

#eval_data_loader_class
class eval_data(Dataset):
  def __init__(self,x,y):
    self.x=torch.from_numpy(np.array(x)).float()
    self.y = torch.from_numpy(y).float()
  def __getitem__(self,indx):
    return self.x[indx],self.y[indx]
  
  def __len__(self):
    return len(self.x)

In [25]:
#Divide Data
trainsets=train_data(train_x,train_y)
testsets = eval_data(test_x,test_y)
trainloader = DataLoader(trainsets,batch_size=100,shuffle=False)
testloader = DataLoader(testsets,batch_size=1,shuffle=False)

In [26]:
#Setting Parametrs
lr = 0.001          #learning Rate
epochs=20           #number of Epochs
model = Machine()
criterion = nn.BCELoss()          #setting Loss
optimizer=torch.optim.Adam(model.parameters(),lr=lr)      #setting Optimization

In [27]:
#Training Process
accu = []
for j in range(epochs):
 
  for i,(x,y) in enumerate(trainloader):
    resulted = model(x)
    loss = criterion(resulted,y)
    #acctemp.append(resulted.detach().numpy())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  
  print(f'Epochs: {j+1}, Loss: {loss.item():.4f}')

Epochs: 1, Loss: 0.5106
Epochs: 2, Loss: 0.4908
Epochs: 3, Loss: 0.4774
Epochs: 4, Loss: 0.4718
Epochs: 5, Loss: 0.4683
Epochs: 6, Loss: 0.4657
Epochs: 7, Loss: 0.4642
Epochs: 8, Loss: 0.4612
Epochs: 9, Loss: 0.4593
Epochs: 10, Loss: 0.4559
Epochs: 11, Loss: 0.4536
Epochs: 12, Loss: 0.4510
Epochs: 13, Loss: 0.4508
Epochs: 14, Loss: 0.4472
Epochs: 15, Loss: 0.4436
Epochs: 16, Loss: 0.4447
Epochs: 17, Loss: 0.4405
Epochs: 18, Loss: 0.4374
Epochs: 19, Loss: 0.4331
Epochs: 20, Loss: 0.4299


In [28]:
#Validate Data
y_pred_list = []
y_pred_new = []
thershold = 0.5         #setting thereshold
with torch.no_grad():

 
  for i,(x,y) in enumerate(testloader):
     resulted = model(x)
     y_pred_list.append(resulted)
     if resulted >= thershold :
       resulted = 1
     else:
       resulted=0
     loss =abs(y-resulted) 
     loss = loss.detach().numpy()
     y_preds = resulted
     y_pred_new.append((loss))
     y_pred_list.append(y_preds)

In [29]:
#Changind Data Shape
outpout_labels =np.array(y_pred_new)
outpout_labels=np.squeeze(outpout_labels)
outpout_labels = np.expand_dims(outpout_labels,axis=1)

In [31]:
#Compute Accuracy
accofdnn=np.sum(abs(outpout_labels-test_y))/len(outpout_labels)
print(f"Accuracy of Neural Netork is {accofdnn}")

Accuracy of Neural Netork is 0.6753213201575922


Ensemble Model

In [32]:
#Linead Discrimination Analyze
model1 = LDA()
model1.fit(train_x,train_y)
#Logistic Regression
model2 = LogisticRegression(penalty='l1',max_iter=50000,solver="liblinear")
model2.fit(train_x,train_y)
#Random Forest
model3 = RandomForestClassifier(max_depth=50, random_state=0,criterion="entropy")
model3.fit(train_x,train_y)

In [33]:
#Output Result of Total 3 models
results_out = model1.predict(test_x) + model2.predict(test_x) + model3.predict(test_x)

In [33]:
#setting Voting System
finalres =results_out
y_pred=[]
for i in range(len(test_x)):
    if finalres[i]==0:
      finalres[i]=0
    elif finalres[i]==1:
         finalres[i]=0
    elif finalres[i]==2:
         finalres[i]=1
    elif finalres[i]==3:
         finalres[i]=1

In [33]:
#Computing Error
number_corr=finalres[finalres==test_y]
accuracy_ensemble = len(number_corr)/len(finalres)

In [33]:
print(f"Accracy of Ensemble Model is {accuracy_ensemble}")