# CS284A Final Project - Model and Evaluation Code
# Kai Silkwood, Thao Nguyen

---


## Data Import and Formatting

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import precision_score, recall_score, confusion_matrix, f1_score
import numpy as np

In [3]:
from google.colab import drive
drive.mount('/content/drive/',force_remount=True)

Mounted at /content/drive/


In [4]:
from torch._C import dtype
from torch.utils.data import Dataset

class barcode_dataset(Dataset):
  def __init__(self, X, Y):
    super(barcode_dataset, self).__init__()
    self.X = X
    self.Y = Y

  def __len__(self):
    return self.X.shape[0]

  def __getitem__(self, idx):
    return self.X[idx], self.Y[idx]

In [5]:
#Import training data
zBC1 = np.loadtxt("/content/drive/MyDrive/CS284AProject/train_Pad_0s_BC1.txt")
BC1z = np.loadtxt("/content/drive/MyDrive/CS284AProject/train_Pad_BC1_0s.txt")
zBC2 = np.loadtxt("/content/drive/MyDrive/CS284AProject/train_Pad_0s_BC2.txt")
BC2z = np.loadtxt("/content/drive/MyDrive/CS284AProject/train_Pad_BC2_0s.txt")

zBC1y = torch.load("/content/drive/MyDrive/CS284AProject/train_tensor_y_0s_BC1.pt")
BC1zy = torch.load("/content/drive/MyDrive/CS284AProject/train_tensor_y_BC1_0.pt")
zBC2y = torch.load("/content/drive/MyDrive/CS284AProject/train_tensor_y_0s_BC2.pt")
BC2zy = torch.load("/content/drive/MyDrive/CS284AProject/train_tensor_y_BC2_0s.pt")

#Import testing data
BC1BC1 = np.loadtxt("/content/drive/MyDrive/CS284AProject/signalBC11.txt")
BC1BC2 = np.loadtxt("/content/drive/MyDrive/CS284AProject/signalBC12.txt")
BC2BC2= np.loadtxt("/content/drive/MyDrive/CS284AProject/signalBC22.txt")
BC2BC1 = np.loadtxt("/content/drive/MyDrive/CS284AProject/signalBC21.txt")

In [6]:
BC1BC1.shape

(100, 184)

In [14]:
#Create labels for testing data
ones = np.ones(92)
zeros = np.zeros(92)

zeros_zeros = np.concatenate((zeros, zeros))
ones_zeros = np.concatenate((ones, zeros))
zeros_ones = np.concatenate((zeros, ones))
ones_ones = np.concatenate((ones, ones))

zeros_zeros2D = np.tile(zeros_zeros, (100,1))
ones_zeros2D = np.tile(ones_zeros, (100,1))
zeros_ones2D = np.tile(zeros_ones, (100,1))
ones_ones2D = np.tile(ones_ones, (100,1))

np.array((zeros_zeros2D, ones_ones2D, zeros_zeros2D))

BC1BC1y = torch.tensor(np.tile(np.array((zeros_zeros, ones_ones, zeros_zeros)), (100,1))).reshape([100,3,184])
BC1BC2y = torch.tensor(np.tile(np.array((zeros_zeros, ones_zeros, zeros_ones)),(100,1))).reshape([100,3,184])
BC2BC1y = torch.tensor(np.tile(np.array((zeros_zeros, zeros_ones, ones_zeros)),(100,1))).reshape([100,3,184])
BC2BC2y = torch.tensor(np.tile(np.array((zeros_zeros, zeros_zeros, ones_ones)),(100,1))).reshape([100,3,184])

In [15]:
#Check that shapes are what we expect
BC1BC1y.shape

torch.Size([100, 3, 184])

In [16]:
zBC1y.shape

torch.Size([100, 3, 184])

In [17]:
train_X = torch.from_numpy(np.concatenate((zBC1, BC1z, zBC2, BC2z)))
train_Y = torch.cat((zBC1y, BC1zy, zBC2y, BC2zy))

test_X = torch.from_numpy(np.concatenate((BC1BC1, BC1BC2, BC2BC1, BC2BC2)))
test_Y = torch.cat((BC1BC1y, BC1BC2y, BC2BC1y, BC2BC2y))

In [18]:
train_data = barcode_dataset(train_X, train_Y)
test_data = barcode_dataset(test_X, test_Y)

In [19]:
batch_size = 100
train_data_tc = torch.utils.data.DataLoader(dataset=train_data, batch_size = batch_size, shuffle=True)
test_data_tc = torch.utils.data.DataLoader(dataset=test_data, batch_size = batch_size, shuffle=True)

In [20]:
X,Y= next(iter(train_data_tc))

In [21]:
X.shape

torch.Size([100, 184])

In [22]:
Y.shape

torch.Size([100, 3, 184])

In [23]:
class LSTM_Classifier(nn.Module):
  def __init__(self, input_size = 1, hidden_size = 128, hidden_size2 = 64, dropout_rate = 0.2, num_layers = 2):
    super(LSTM_Classifier, self).__init__()

    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)

    self.layer1 = nn.Sequential(
        nn.Linear(hidden_size, hidden_size2),
        nn.ReLU(),
        nn.Dropout(dropout_rate),
    )

    self.fc = nn.Linear(hidden_size2, 3)
    self.fc2 = nn.Linear(hidden_size, 3)
    self.activation = nn.Softmax(dim=2)

  def forward(self, X):
    batch_size, seq_length = X.size()
    out = X.unsqueeze(-1).to(torch.float32)
    lstm_out, _ = self.lstm(out)
    out = lstm_out.view(batch_size, seq_length, -1)

    #out = self.layer1(out)
    #out = self.fc(out)

    out = self.fc2(out)
    #out = self.activation(out)

    return out.reshape([batch_size, -1, seq_length])

## LSTM

In [78]:
lstm_model2 = LSTM_Classifier()
num_epochs = 250
learning_rate = 0.0001
optimizer = torch.optim.Adam(lstm_model2.parameters(), lr=learning_rate)

weights = torch.tensor([1., 10., 10.])
criterion = nn.CrossEntropyLoss(weight=weights)

total_step = len(train_data_tc)
for epoch in range(num_epochs):
  for i, (X, Y) in enumerate(train_data_tc):

    #Forward pass
    output = lstm_model2(X)
    loss = criterion(output, Y.to(torch.float32))
    #Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  if (epoch+1) % 10==0:
    print(f"Epoch: {epoch+1}/{num_epochs} - Loss: {loss.item()}")

Epoch: 10/250 - Loss: 5.405324935913086
Epoch: 20/250 - Loss: 5.304596424102783
Epoch: 30/250 - Loss: 4.866156578063965
Epoch: 40/250 - Loss: 4.296767234802246
Epoch: 50/250 - Loss: 4.16372537612915
Epoch: 60/250 - Loss: 4.205380439758301
Epoch: 70/250 - Loss: 4.04855489730835
Epoch: 80/250 - Loss: 3.5079832077026367
Epoch: 90/250 - Loss: 3.649627685546875
Epoch: 100/250 - Loss: 3.22391939163208
Epoch: 110/250 - Loss: 3.1400790214538574
Epoch: 120/250 - Loss: 2.5436880588531494
Epoch: 130/250 - Loss: 2.3026633262634277
Epoch: 140/250 - Loss: 2.466386318206787
Epoch: 150/250 - Loss: 2.130204439163208
Epoch: 160/250 - Loss: 1.9149889945983887
Epoch: 170/250 - Loss: 2.1536734104156494
Epoch: 180/250 - Loss: 2.2821309566497803
Epoch: 190/250 - Loss: 2.3024563789367676
Epoch: 200/250 - Loss: 0.7363616824150085
Epoch: 210/250 - Loss: 0.585777997970581
Epoch: 220/250 - Loss: 0.469929039478302
Epoch: 230/250 - Loss: 0.44687265157699585
Epoch: 240/250 - Loss: 0.38582244515419006
Epoch: 250/250 

In [79]:
import itertools
y_pred_list = []
y_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(test_data_tc):
    output = lstm_model2(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        y_pred_list.append(np.argmax(classed))
        y_target_list.append(np.argmax(this_target))

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_target_list = [a.squeeze().tolist() for a in y_target_list]

#Training accuracy
x_pred_list = []
x_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(train_data_tc):
    output = lstm_model2(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        x_pred_list.append(np.argmax(classed))
        x_target_list.append(np.argmax(this_target))

x_pred_list = [a.squeeze().tolist() for a in x_pred_list]

x_target_list = [a.squeeze().tolist() for a in x_target_list]

conf_matrix = confusion_matrix(y_target_list, y_pred_list)
print("Confusion Matrix of the Test Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(y_target_list, y_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(y_target_list, y_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(y_target_list, y_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(y_pred_list, y_target_list)))
print("_______________________")
conf_matrix = confusion_matrix(x_target_list, x_pred_list)
print("Confusion Matrix of the Training Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(x_target_list, x_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(x_target_list, x_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(x_target_list, x_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(x_pred_list, x_target_list)))

Confusion Matrix of the Test Set
-----------------------
[[    0     0     0]
 [ 7476  8390 20934]
 [12258  3627 20915]]
Precision:	0.39816576086956523
Recall:	0.39816576086956523
F1 Score:	0.39816576086956523
Accuracy:	0.39816576086956523
_______________________
Confusion Matrix of the Training Set
-----------------------
[[30550  2598  6852]
 [    0 16800     0]
 [    0     7 16793]]
Precision:	0.8715081521739131
Recall:	0.8715081521739131
F1 Score:	0.8715081521739131
Accuracy:	0.8715081521739131


In [80]:
#Accuracy of just the barcode classes
(8390+20915)/(7476+12258+8390+20934+12258+3627+20915)

0.341319387826411

In [81]:
#Accuracy per length of one barcode rather than per time point
import itertools
y_pred_list = []
y_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(test_data_tc):
    output = lstm_model2(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      row_preds = []
      row_targets = []

      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        row_preds.append(np.argmax(classed))
        row_targets.append(np.argmax(this_target))

        if (col+1)%92==0:
           row_mode_pred = max(set(row_preds), key = row_preds.count)
           row_mode_target = max(set(row_targets), key = row_targets.count)

           y_pred_list.append(row_mode_pred)
           y_target_list.append(row_mode_target)

           row_preds = []
           row_targets =[]

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_target_list = [a.squeeze().tolist() for a in y_target_list]


#Training accuracy
x_pred_list = []
x_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(train_data_tc):
    output = lstm_model2(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      row_preds = []
      row_targets = []
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        row_preds.append(np.argmax(classed))
        row_targets.append(np.argmax(this_target))

        if (col+1)%92==0:
           row_mode_pred = max(set(row_preds), key = row_preds.count)
           row_mode_target = max(set(row_targets), key = row_targets.count)

           x_pred_list.append(row_mode_pred)
           x_target_list.append(row_mode_target)

           row_preds = []
           row_targets = []

x_pred_list = [a.squeeze().tolist() for a in x_pred_list]

x_target_list = [a.squeeze().tolist() for a in x_target_list]

conf_matrix = confusion_matrix(y_target_list, y_pred_list)
print("Confusion Matrix of the Test Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(y_target_list, y_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(y_target_list, y_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(y_target_list, y_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(y_pred_list, y_target_list)))
print("________________________")
conf_matrix = confusion_matrix(x_target_list, x_pred_list)
print("Confusion Matrix of the Training Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(x_target_list, x_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(x_target_list, x_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(x_target_list, x_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(x_pred_list, x_target_list)))

Confusion Matrix of the Test Set
-----------------------
[[  0   0   0]
 [ 73  64 263]
 [ 54  93 253]]
Precision:	0.39625
Recall:	0.39625
F1 Score:	0.39625
Accuracy:	0.39625
________________________
Confusion Matrix of the Training Set
-----------------------
[[400   0   0]
 [  0 200   0]
 [  0   0 200]]
Precision:	1.0
Recall:	1.0
F1 Score:	1.0
Accuracy:	1.0


In [82]:
#Per Barcode Accuracy
(64+253)/(73+64+263+54+93+253)

0.39625

## LSTM-0Weight


In [61]:
lstm_model = LSTM_Classifier()
output = lstm_model(X)

In [62]:
output.shape

torch.Size([100, 3, 184])

In [63]:
num_epochs = 150
learning_rate = 0.0001
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=learning_rate)

weights = torch.tensor([0., 1., 1.])
criterion = nn.CrossEntropyLoss(weight=weights)

In [64]:
criterion(lstm_model(X),Y.to(torch.float32))

tensor(0.5024, grad_fn=<DivBackward1>)

In [65]:
total_step = len(train_data_tc)
for epoch in range(num_epochs):
  for i, (X, Y) in enumerate(train_data_tc):

    #Forward pass
    output = lstm_model(X)
    loss = criterion(output, Y.to(torch.float32))
    #Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  if (epoch+1) % 10==0:
    print(f"Epoch: {epoch+1}/{num_epochs} - Loss: {loss.item()}")

Epoch: 10/150 - Loss: 0.4891889691352844
Epoch: 20/150 - Loss: 0.473030686378479
Epoch: 30/150 - Loss: 0.38718703389167786
Epoch: 40/150 - Loss: 0.34407684206962585
Epoch: 50/150 - Loss: 0.32686954736709595
Epoch: 60/150 - Loss: 0.3209340274333954
Epoch: 70/150 - Loss: 0.32205379009246826
Epoch: 80/150 - Loss: 0.3204686939716339
Epoch: 90/150 - Loss: 0.31925535202026367
Epoch: 100/150 - Loss: 0.3188161253929138
Epoch: 110/150 - Loss: 0.31909269094467163
Epoch: 120/150 - Loss: 0.31839221715927124
Epoch: 130/150 - Loss: 0.31792718172073364
Epoch: 140/150 - Loss: 0.31763744354248047
Epoch: 150/150 - Loss: 0.31800612807273865


In [66]:
import itertools
y_pred_list = []
y_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(test_data_tc):
    output = lstm_model(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        y_pred_list.append(np.argmax(classed))
        y_target_list.append(np.argmax(this_target))

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_target_list = [a.squeeze().tolist() for a in y_target_list]


In [67]:
#Training accuracy
x_pred_list = []
x_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(train_data_tc):
    output = lstm_model(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        x_pred_list.append(np.argmax(classed))
        x_target_list.append(np.argmax(this_target))

x_pred_list = [a.squeeze().tolist() for a in x_pred_list]

x_target_list = [a.squeeze().tolist() for a in x_target_list]

In [68]:
def accuracy(pred, target):
  correct = 0
  for i in range(len(pred)):
    if pred[i]==target[i]:
      correct+=1

  return(correct/len(pred))

In [69]:
conf_matrix = confusion_matrix(y_target_list, y_pred_list)
print("Confusion Matrix of the Test Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(y_target_list, y_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(y_target_list, y_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(y_target_list, y_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(y_pred_list, y_target_list)))

Confusion Matrix of the Test Set
-----------------------
[[    0     0     0]
 [  199 12257 24344]
 [  210 12347 24243]]
Precision:	0.49592391304347827
Recall:	0.49592391304347827
F1 Score:	0.49592391304347827
Accuracy:	0.49592391304347827


In [70]:
conf_matrix = confusion_matrix(x_target_list, x_pred_list)
print("Confusion Matrix of the Training Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(x_target_list, x_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(x_target_list, x_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(x_target_list, x_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(x_pred_list, x_target_list)))

Confusion Matrix of the Training Set
-----------------------
[[    0 10125 29875]
 [    0  4453 12347]
 [    0  4421 12379]]
Precision:	0.22869565217391305
Recall:	0.22869565217391305
F1 Score:	0.22869565217391305
Accuracy:	0.22869565217391305


In [71]:
#Accuracy per length of one barcode rather than per time point
import itertools
y_pred_list = []
y_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(test_data_tc):
    output = lstm_model(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      row_preds = []
      row_targets = []

      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        row_preds.append(np.argmax(classed))
        row_targets.append(np.argmax(this_target))

        if (col+1)%92==0:
           row_mode_pred = max(set(row_preds), key = row_preds.count)
           row_mode_target = max(set(row_targets), key = row_targets.count)

           y_pred_list.append(row_mode_pred)
           y_target_list.append(row_mode_target)

           row_preds = []
           row_targets =[]

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_target_list = [a.squeeze().tolist() for a in y_target_list]


#Training accuracy
x_pred_list = []
x_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(train_data_tc):
    output = lstm_model(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      row_preds = []
      row_targets = []
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        row_preds.append(np.argmax(classed))
        row_targets.append(np.argmax(this_target))

        if (col+1)%92==0:
           row_mode_pred = max(set(row_preds), key = row_preds.count)
           row_mode_target = max(set(row_targets), key = row_targets.count)

           x_pred_list.append(row_mode_pred)
           x_target_list.append(row_mode_target)

           row_preds = []
           row_targets = []

x_pred_list = [a.squeeze().tolist() for a in x_pred_list]

x_target_list = [a.squeeze().tolist() for a in x_target_list]

In [72]:
conf_matrix = confusion_matrix(y_target_list, y_pred_list)
print("Confusion Matrix of the Test Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(y_target_list, y_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(y_target_list, y_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(y_target_list, y_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(y_pred_list, y_target_list)))
print("________________________")
conf_matrix = confusion_matrix(x_target_list, x_pred_list)
print("Confusion Matrix of the Training Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(x_target_list, x_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(x_target_list, x_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(x_target_list, x_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(x_pred_list, x_target_list)))

Confusion Matrix of the Test Set
-----------------------
[[  0 400]
 [  0 400]]
Precision:	0.5
Recall:	0.5
F1 Score:	0.5
Accuracy:	0.5
________________________
Confusion Matrix of the Training Set
-----------------------
[[  0 125 275]
 [  0   0 200]
 [  0   0 200]]
Precision:	0.25
Recall:	0.25
F1 Score:	0.25
Accuracy:	0.25


## LSTM-MLP


In [56]:
class LSTM_Classifier2(nn.Module):
  def __init__(self, input_size = 1, hidden_size = 128, hidden_size2 = 64, dropout_rate = 0.2, num_layers = 2):
    super(LSTM_Classifier2, self).__init__()

    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)

    self.layer1 = nn.Sequential(
        nn.Linear(hidden_size, hidden_size2),
        nn.ReLU(),
        nn.Dropout(dropout_rate),
    )

    self.fc = nn.Linear(hidden_size2, 3)
    self.fc2 = nn.Linear(hidden_size, 3)
    self.activation = nn.Softmax(dim=2)

  def forward(self, X):
    batch_size, seq_length = X.size()
    out = X.unsqueeze(-1).to(torch.float32)
    lstm_out, _ = self.lstm(out)
    out = lstm_out.view(batch_size, seq_length, -1)

    out = self.layer1(out)
    out = self.fc(out)

    #out = self.fc2(out)
    #out = self.activation(out)

    return out.reshape([batch_size, -1, seq_length])
lstm_model = LSTM_Classifier2()
num_epochs = 250
learning_rate = 0.001
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=learning_rate)

weights = torch.tensor([0., 1., 1.])
criterion = nn.CrossEntropyLoss(weight=weights)

total_step = len(train_data_tc)
for epoch in range(num_epochs):
  for i, (X, Y) in enumerate(train_data_tc):

    #Forward pass
    output = lstm_model(X)
    loss = criterion(output, Y.to(torch.float32))
    #Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  if (epoch+1) % 10==0:
    print(f"Epoch: {epoch+1}/{num_epochs} - Loss: {loss.item()}")

Epoch: 10/250 - Loss: 4.612226963043213
Epoch: 20/250 - Loss: 4.54109525680542
Epoch: 30/250 - Loss: 4.014106750488281
Epoch: 40/250 - Loss: 3.912449836730957
Epoch: 50/250 - Loss: 3.8523998260498047
Epoch: 60/250 - Loss: 3.7701637744903564
Epoch: 70/250 - Loss: 3.4728198051452637
Epoch: 80/250 - Loss: 3.272183656692505
Epoch: 90/250 - Loss: 3.5786778926849365
Epoch: 100/250 - Loss: 3.314720392227173
Epoch: 110/250 - Loss: 3.8102211952209473
Epoch: 120/250 - Loss: 3.310243606567383
Epoch: 130/250 - Loss: 2.973111391067505
Epoch: 140/250 - Loss: 3.160113573074341
Epoch: 150/250 - Loss: 3.29512619972229
Epoch: 160/250 - Loss: 3.0569496154785156
Epoch: 170/250 - Loss: 2.7210943698883057
Epoch: 180/250 - Loss: 2.520073890686035
Epoch: 190/250 - Loss: 3.315950393676758
Epoch: 200/250 - Loss: 3.0765509605407715
Epoch: 210/250 - Loss: 2.59672212600708
Epoch: 220/250 - Loss: 3.4845006465911865
Epoch: 230/250 - Loss: 3.0638082027435303
Epoch: 240/250 - Loss: 2.0918092727661133
Epoch: 250/250 - 

In [57]:
import itertools
y_pred_list = []
y_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(test_data_tc):
    output = lstm_model(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        y_pred_list.append(np.argmax(classed))
        y_target_list.append(np.argmax(this_target))

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_target_list = [a.squeeze().tolist() for a in y_target_list]
#Training accuracy
x_pred_list = []
x_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(train_data_tc):
    output = lstm_model(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        x_pred_list.append(np.argmax(classed))
        x_target_list.append(np.argmax(this_target))

x_pred_list = [a.squeeze().tolist() for a in x_pred_list]

x_target_list = [a.squeeze().tolist() for a in x_target_list]


In [58]:
conf_matrix = confusion_matrix(y_target_list, y_pred_list)
print("Confusion Matrix of the Test Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(y_target_list, y_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(y_target_list, y_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(y_target_list, y_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(y_pred_list, y_target_list)))
print("________________________")
conf_matrix = confusion_matrix(x_target_list, x_pred_list)
print("Confusion Matrix of the Training Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(x_target_list, x_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(x_target_list, x_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(x_target_list, x_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(x_pred_list, x_target_list)))

Confusion Matrix of the Test Set
-----------------------
[[    0     0     0]
 [18124 18676     0]
 [19192 17608     0]]
Precision:	0.25375
Recall:	0.25375
F1 Score:	0.25375
Accuracy:	0.25375
________________________
Confusion Matrix of the Training Set
-----------------------
[[36528  1164  2308]
 [    0 11215  5585]
 [    0  6977  9823]]
Precision:	0.7821467391304348
Recall:	0.7821467391304348
F1 Score:	0.7821467391304348
Accuracy:	0.7821467391304348


In [59]:
#Accuracy per length of one barcode rather than per time point
import itertools
y_pred_list = []
y_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(test_data_tc):
    output = lstm_model(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      row_preds = []
      row_targets = []

      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        row_preds.append(np.argmax(classed))
        row_targets.append(np.argmax(this_target))

        if (col+1)%92==0:
           row_mode_pred = max(set(row_preds), key = row_preds.count)
           row_mode_target = max(set(row_targets), key = row_targets.count)

           y_pred_list.append(row_mode_pred)
           y_target_list.append(row_mode_target)

           row_preds = []
           row_targets =[]

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_target_list = [a.squeeze().tolist() for a in y_target_list]


#Training accuracy
x_pred_list = []
x_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(train_data_tc):
    output = lstm_model(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      row_preds = []
      row_targets = []
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        row_preds.append(np.argmax(classed))
        row_targets.append(np.argmax(this_target))

        if (col+1)%92==0:
           row_mode_pred = max(set(row_preds), key = row_preds.count)
           row_mode_target = max(set(row_targets), key = row_targets.count)

           x_pred_list.append(row_mode_pred)
           x_target_list.append(row_mode_target)

           row_preds = []
           row_targets = []

x_pred_list = [a.squeeze().tolist() for a in x_pred_list]

x_target_list = [a.squeeze().tolist() for a in x_target_list]

In [60]:
conf_matrix = confusion_matrix(y_target_list, y_pred_list)
print("Confusion Matrix of the Test Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(y_target_list, y_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(y_target_list, y_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(y_target_list, y_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(y_pred_list, y_target_list)))
print("________________________")
conf_matrix = confusion_matrix(x_target_list, x_pred_list)
print("Confusion Matrix of the Training Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(x_target_list, x_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(x_target_list, x_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(x_target_list, x_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(x_pred_list, x_target_list)))

Confusion Matrix of the Test Set
-----------------------
[[  0   0   0]
 [200 200   0]
 [200 200   0]]
Precision:	0.25
Recall:	0.25
F1 Score:	0.25
Accuracy:	0.25
________________________
Confusion Matrix of the Training Set
-----------------------
[[400   0   0]
 [  0 135  65]
 [  0  29 171]]
Precision:	0.8825
Recall:	0.8825
F1 Score:	0.8825
Accuracy:	0.8825


## LSTM-MLP-0Weight

In [73]:
lstm_model3 = LSTM_Classifier2()
num_epochs = 150
learning_rate = 0.001
optimizer = torch.optim.Adam(lstm_model3.parameters(), lr=learning_rate)

weights = torch.tensor([0., 1., 1.])
criterion = nn.CrossEntropyLoss(weight=weights)

total_step = len(train_data_tc)
for epoch in range(num_epochs):
  for i, (X, Y) in enumerate(train_data_tc):

    #Forward pass
    output = lstm_model3(X)
    loss = criterion(output, Y.to(torch.float32))
    #Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  if (epoch+1) % 10==0:
    print(f"Epoch: {epoch+1}/{num_epochs} - Loss: {loss.item()}")

Epoch: 10/150 - Loss: 0.42681968212127686
Epoch: 20/150 - Loss: 0.3717372417449951
Epoch: 30/150 - Loss: 0.35812875628471375
Epoch: 40/150 - Loss: 0.3967970907688141
Epoch: 50/150 - Loss: 0.35975873470306396
Epoch: 60/150 - Loss: 0.3623797297477722
Epoch: 70/150 - Loss: 0.3503968417644501
Epoch: 80/150 - Loss: 0.3422216475009918
Epoch: 90/150 - Loss: 0.35942938923835754
Epoch: 100/150 - Loss: 0.35415178537368774
Epoch: 110/150 - Loss: 0.35468074679374695
Epoch: 120/150 - Loss: 0.3539099097251892
Epoch: 130/150 - Loss: 0.34324148297309875
Epoch: 140/150 - Loss: 0.34682101011276245
Epoch: 150/150 - Loss: 0.3435319662094116


In [74]:
import itertools
y_pred_list = []
y_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(test_data_tc):
    output = lstm_model3(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        y_pred_list.append(np.argmax(classed))
        y_target_list.append(np.argmax(this_target))

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_target_list = [a.squeeze().tolist() for a in y_target_list]
#Training accuracy
x_pred_list = []
x_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(train_data_tc):
    output = lstm_model3(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        x_pred_list.append(np.argmax(classed))
        x_target_list.append(np.argmax(this_target))

x_pred_list = [a.squeeze().tolist() for a in x_pred_list]

x_target_list = [a.squeeze().tolist() for a in x_target_list]

conf_matrix = confusion_matrix(y_target_list, y_pred_list)
print("Confusion Matrix of the Test Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(y_target_list, y_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(y_target_list, y_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(y_target_list, y_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(y_pred_list, y_target_list)))
print("________________________")
conf_matrix = confusion_matrix(x_target_list, x_pred_list)
print("Confusion Matrix of the Training Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(x_target_list, x_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(x_target_list, x_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(x_target_list, x_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(x_pred_list, x_target_list)))

Confusion Matrix of the Test Set
-----------------------
[[    0     0     0]
 [  998 15673 20129]
 [ 1015 15892 19893]]
Precision:	0.4832336956521739
Recall:	0.4832336956521739
F1 Score:	0.4832336956521739
Accuracy:	0.4832336956521739
________________________
Confusion Matrix of the Training Set
-----------------------
[[ 1135 11064 27801]
 [    0 11564  5236]
 [    0 10556  6244]]
Precision:	0.25737771739130433
Recall:	0.25737771739130433
F1 Score:	0.25737771739130433
Accuracy:	0.25737771739130433


In [75]:
#Accuracy per length of one barcode rather than per time point
import itertools
y_pred_list = []
y_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(test_data_tc):
    output = lstm_model3(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      row_preds = []
      row_targets = []

      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        row_preds.append(np.argmax(classed))
        row_targets.append(np.argmax(this_target))

        if (col+1)%92==0:
           row_mode_pred = max(set(row_preds), key = row_preds.count)
           row_mode_target = max(set(row_targets), key = row_targets.count)

           y_pred_list.append(row_mode_pred)
           y_target_list.append(row_mode_target)

           row_preds = []
           row_targets =[]

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_target_list = [a.squeeze().tolist() for a in y_target_list]


#Training accuracy
x_pred_list = []
x_target_list = []

with torch.no_grad():
  for i, (X,Y) in enumerate(train_data_tc):
    output = lstm_model3(X)
    output = nn.Softmax(dim=1)(output)

    for row in range(output.shape[0]):
      row_preds = []
      row_targets = []
      for col in range(output.shape[2]):
        this_pred = output[row,:,col].detach().numpy()

        classed = (this_pred == this_pred.max()).astype(int)
        this_target = Y[row,:,col].detach().numpy()

        row_preds.append(np.argmax(classed))
        row_targets.append(np.argmax(this_target))

        if (col+1)%92==0:
           row_mode_pred = max(set(row_preds), key = row_preds.count)
           row_mode_target = max(set(row_targets), key = row_targets.count)

           x_pred_list.append(row_mode_pred)
           x_target_list.append(row_mode_target)

           row_preds = []
           row_targets = []

x_pred_list = [a.squeeze().tolist() for a in x_pred_list]

x_target_list = [a.squeeze().tolist() for a in x_target_list]

conf_matrix = confusion_matrix(y_target_list, y_pred_list)
print("Confusion Matrix of the Test Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(y_target_list, y_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(y_target_list, y_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(y_target_list, y_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(y_pred_list, y_target_list)))
print("________________________")
conf_matrix = confusion_matrix(x_target_list, x_pred_list)
print("Confusion Matrix of the Training Set")
print("-----------------------")
print(conf_matrix)
print("Precision:\t"+str(precision_score(x_target_list, x_pred_list, average="micro")))
print("Recall:\t"+str(recall_score(x_target_list, x_pred_list, average="micro")))
print("F1 Score:\t"+str(f1_score(x_target_list, x_pred_list, average="micro")))
print("Accuracy:\t"+str(accuracy(x_pred_list, x_target_list)))

Confusion Matrix of the Test Set
-----------------------
[[ 86 314]
 [ 92 308]]
Precision:	0.4925
Recall:	0.4925
F1 Score:	0.4925
Accuracy:	0.4925
________________________
Confusion Matrix of the Training Set
-----------------------
[[  0 159 241]
 [  0 197   3]
 [  0 196   4]]
Precision:	0.25125
Recall:	0.25125
F1 Score:	0.25125
Accuracy:	0.25125
