# RNN Classifier

In [101]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [102]:
import sys
import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader

sys.path.append('../scripts')
import ml_helper as mlh
import rnn

In [103]:
device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
device = torch.device("cpu") # RNN training way faster on CPU
print(f"Using {device} device")

Using cpu device


### Data Preperation

In [104]:
organisms = ["E.Coli", "Drosophila.Melanogaster", "Homo.Sapiens"]
organism = organisms[1]
batch_size = 1
min_length = None
max_length = None
padding_pos = "right" if batch_size > 1 else None

train_dataset = mlh.CodonDataset(organism=organism, split="train", min_length=min_length, max_length=max_length, padding_pos=padding_pos)
print("Länge train_dataset:", len(train_dataset))
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

Länge train_dataset: 17339


### Hyperparameters and Model Initialization

In [105]:
# Model
input_dim = len(mlh.amino_acids)
output_dim = len(mlh.codons)
n_hidden = 128

rnnModel = rnn.RNN(input_size=input_dim, hidden_size=n_hidden, output_size=output_dim, batch_size=batch_size)
print(rnnModel)

RNN(
  (i2h): Linear(in_features=22, out_features=128, bias=False)
  (h2h): Linear(in_features=128, out_features=128, bias=True)
  (h2o): Linear(in_features=128, out_features=65, bias=True)
)


### Training

In [106]:
# Train variables
epochs = 1
learning_rate = 0.001
loss = nn.CrossEntropyLoss()
optimizer = optim.Adagrad(rnnModel.parameters(), lr=learning_rate)
optimizer = optim.Adam(rnnModel.parameters(), lr=learning_rate)
optimizer = optim.RMSprop(rnnModel.parameters(), lr=learning_rate)
optimizer = optim.SGD(rnnModel.parameters(), lr=learning_rate, momentum=0.9)

In [107]:
rnn.train(rnnModel, data=train_loader, epochs=epochs, optimizer=optimizer, loss_fn=loss, device=device)

=> Starting training on device: cpu
loss:  3.7295  [  200/17339]
loss:  3.3024  [  400/17339]
loss:  2.8757  [  600/17339]
loss:  2.5178  [  800/17339]
loss:  2.2621  [ 1000/17339]
loss:  2.0783  [ 1200/17339]
loss:  1.9434  [ 1400/17339]
loss:  1.8404  [ 1600/17339]
loss:  1.7586  [ 1800/17339]
loss:  1.6922  [ 2000/17339]
loss:  1.6383  [ 2200/17339]
loss:  1.5924  [ 2400/17339]
loss:  1.5536  [ 2600/17339]
loss:  1.5200  [ 2800/17339]
loss:  1.4912  [ 3000/17339]
loss:  1.4658  [ 3200/17339]
loss:  1.4430  [ 3400/17339]
loss:  1.4231  [ 3600/17339]
loss:  1.4048  [ 3800/17339]
loss:  1.3881  [ 4000/17339]
loss:  1.3730  [ 4200/17339]
loss:  1.3592  [ 4400/17339]
loss:  1.3462  [ 4600/17339]
loss:  1.3349  [ 4800/17339]
loss:  1.3245  [ 5000/17339]
loss:  1.3153  [ 5200/17339]
loss:  1.3058  [ 5400/17339]
loss:  1.2977  [ 5600/17339]
loss:  1.2901  [ 5800/17339]
loss:  1.2827  [ 6000/17339]
loss:  1.2761  [ 6200/17339]
loss:  1.2699  [ 6400/17339]
loss:  1.2639  [ 6600/17339]
loss:  

Epoch times for different optimizers (hidden_size=128):
- Adagrad: 92s
- Adam: 90s
- SGD: 89s
- RMSprop: 91s


In [108]:
mlh.save_model(rnnModel, "rnn", organism, appendix=f"hidden{n_hidden}_epochs{epochs}_lr{learning_rate}_optim{optimizer.__class__.__name__}")

Model saved as 20240521150637_rnn_hidden128_epochs1_lr0.001_optimSGD.pt
