# RNN Classifier

In [204]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [210]:
import sys
import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader

sys.path.append('../scripts')
import ml_helper as mlh
import rnn

In [206]:
device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    # else "mps"
    # if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


### Data Preperation

In [207]:
organism = "E.Coli"
batch_size = 1
min_length = None
max_length = 600
padding_pos = "right" if batch_size > 1 else None

train_dataset = mlh.CodonDataset(organism=organism, split="train", min_length=min_length, max_length=max_length, padding_pos=padding_pos)
print("Länge train_dataset:", len(train_dataset))
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

Länge train_dataset: 2839


### Hyperparameters and Model Initialization

In [208]:
# Model
input_dim = len(mlh.amino_acids)
output_dim = len(mlh.codons)
n_hidden = 128

rnnModel = rnn.RNN(input_size=input_dim, hidden_size=n_hidden, output_size=output_dim, batch_size=batch_size)
print(rnnModel)

RNN(
  (i2h): Linear(in_features=22, out_features=128, bias=False)
  (h2h): Linear(in_features=128, out_features=128, bias=True)
  (h2o): Linear(in_features=128, out_features=65, bias=True)
)


### Training

In [209]:
# Train variables
epochs = 5
learning_rate = 0.01
loss = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(rnnModel.parameters(), lr=learning_rate)

rnn.train(rnnModel, data=train_loader, epochs=epochs, optimizer=optimizer, loss_fn=loss, device=device)

=> Starting training on device: cpu
loss: 1115.160645  [    1/ 2839]  (0.09s)
loss: 1020.330322  [  201/ 2839]  (4.85s)
loss: 522.081543  [  401/ 2839]  (9.54s)


KeyboardInterrupt: 

In [None]:
mlh.save_model(rnnModel, "rnn", organism)

Model saved as 20240519160924_rnn.pt
