In [1]:
# import numpy as np
# import pandas as pd
# import sys, re, textwrap
# import pickle
# import lzma

In [None]:
# import time
# import math

In [166]:
# import os
# import random
# from string import ascii_letters

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import random_split
# from unidecode import unidecode

In [3]:
from data_handler import SeqDataset
from model import Model
from torch.utils.data import DataLoader

In [42]:
from src.data_encoding import encode_res, all_resnames,encode_location, selected_locations

# Setup

In [30]:
_ = torch.manual_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [29]:
device

device(type='cuda')

# Defs

In [8]:
nres = 1024

In [9]:
dataset = SeqDataset("../data/data_seq_locations.xz", nres_max=nres)

In [210]:
print("length of the dataset is:", len(dataset))

length of the dataset is: 40011


In [209]:
train_dataset, test_dataset = random_split(dataset, [len(dataset)-2500, 2500])

## Model

In [203]:
from torch.autograd import Variable
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, device):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, 
                            batch_first=True, dropout=0.1)
#         self.gru = nn.GRU(input_size, self.hidden_size, self.num_layers, batch_first=True, dropout=0.2)
#         self.fc1 = nn.Linear(hidden_size, int(hidden_size/2))
#         self.relu = nn.ReLU()
#         self.fc2 = nn.Linear(int(hidden_size/2), int(hidden_size/2))
#         self.fc3 = nn.Linear(int(hidden_size/2), num_classes)
        self.hidden2tag = nn.Linear(self.hidden_size, num_classes)
        self.device = device
    
    def forward(self, x):
        x = x.float()
        h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).float()).to(self.device)
        c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).float()).to(self.device)
        out, _ = self.lstm(x, (h0,c0)) 
#         out = self.relu(self.fc1(out[:, -1, :]))
#         out = self.relu(self.fc2(out))
#         out = self.fc3(out) 
        tag_space = self.hidden2tag(out[:, -1, :])
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [184]:
n_letters = len(all_resnames)
n_categories = len(selected_locations)
learning_rate = 0.0002

In [204]:
model = RNN(nres, 64, 4, n_categories, device=device)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [199]:
model.parameters

<bound method Module.parameters of RNN(
  (lstm): LSTM(1024, 64, num_layers=4, batch_first=True, dropout=0.1, bidirectional=True)
  (hidden2tag): Linear(in_features=64, out_features=3, bias=True)
)>

### test the model


In [200]:
dataloader = DataLoader(train_dataset,batch_size=12)
X = next(iter(dataloader))[0]

In [205]:
X = X.to(device)
X.shape

torch.Size([12, 26, 1024])

In [206]:
model(X)

tensor([[-1.0955, -1.1213, -1.0795],
        [-1.0969, -1.1208, -1.0786],
        [-1.0953, -1.1209, -1.0800],
        [-1.0948, -1.1231, -1.0784],
        [-1.0958, -1.1211, -1.0794],
        [-1.0938, -1.1227, -1.0798],
        [-1.0957, -1.1226, -1.0781],
        [-1.0959, -1.1211, -1.0793],
        [-1.0950, -1.1214, -1.0799],
        [-1.0939, -1.1219, -1.0805],
        [-1.0961, -1.1232, -1.0770],
        [-1.0928, -1.1250, -1.0786]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward0>)

## Train the model

In [207]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, Y) in enumerate(dataloader):
        # Compute prediction and loss
        x = X.to(device)
        y = Y.to(device)
        pred = model(x)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, Y in dataloader:
            x = X.to(device)
            y = Y.to(device)
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
#             correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
#     correct /= size
#     print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    print(f"Avg loss: {test_loss:>8f} \n")

In [208]:
loss_fn = nn.CrossEntropyLoss()
train_dataloader = DataLoader(train_dataset,batch_size=128)
test_dataloader = DataLoader(test_dataset,batch_size=256)

epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 1.130861  [    0/40000]
loss: 0.784507  [12800/40000]
loss: 0.628886  [25600/40000]
loss: 0.627706  [38400/40000]
Avg loss: 0.581353 

Epoch 2
-------------------------------
loss: 0.638634  [    0/40000]
loss: 0.610565  [12800/40000]
loss: 0.521188  [25600/40000]
loss: 0.578767  [38400/40000]
Avg loss: 0.534286 

Epoch 3
-------------------------------
loss: 0.563192  [    0/40000]
loss: 0.553189  [12800/40000]
loss: 0.474341  [25600/40000]
loss: 0.483876  [38400/40000]
Avg loss: 0.496432 

Epoch 4
-------------------------------
loss: 0.531569  [    0/40000]
loss: 0.475931  [12800/40000]
loss: 0.425616  [25600/40000]
loss: 0.449872  [38400/40000]
Avg loss: 0.475845 

Epoch 5
-------------------------------
loss: 0.516523  [    0/40000]
loss: 0.436519  [12800/40000]
loss: 0.385129  [25600/40000]
loss: 0.426171  [38400/40000]
Avg loss: 0.473970 

Epoch 6
-------------------------------
loss: 0.501593  [    0/40000]
loss: 0.410181  [12800/40

In [102]:
dataloader = DataLoader(train_dataset,batch_size=20)
x,y = next(iter(dataloader))
x = x.to(device)

In [103]:
with torch.no_grad():
    y_pred = model(x)
y_pred
a = torch.rand(2,3)

In [104]:
b = nn.Softmax()
b(y_pred).round()

  b(y_pred).round()


tensor([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 0., 1.],
        [1., 0., 0.],
        [0., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]], device='cuda:0')

In [105]:
y

tensor([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 1.],
        [0., 1., 1.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 0., 0.],
        [1., 1., 1.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 1.]], dtype=torch.float64)