# **Set Env**

In [1]:
%%capture
from google.colab import drive
drive.mount('/content/drive')

from google.colab import userdata
token = userdata.get('GITHUB_TOKEN')
user_name = userdata.get('GITHUB_USERNAME')
mail = userdata.get('GITHUB_MAIL')

!git config --global user.name "{user_name}"
!git config --global user.email "{mail}"
!git clone https://{token}@github.com/nmach22/Promoter-Classification.git
!pip install -r ./Promoter-Classification/requirements.txt

In [2]:
import sys
import torch

ROOT_DIR = '/content/Promoter-Classification'
sys.path.append(ROOT_DIR)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [20]:
import os
import yaml

from pathlib import Path

from utils.data_split import dataset_split
from utils.encoding_functions import KmerEncoding
from utils.fasta_dataset import FastaDataset

_PATH_TO_ROOT = os.path.join(Path.cwd().absolute(), 'Promoter-Classification')
_DEFAULT_CONFIG_PATH = os.path.join(_PATH_TO_ROOT, 'config', 'config.yaml')

with open(_DEFAULT_CONFIG_PATH, 'r') as f:
    config = yaml.safe_load(f)

fasta_positive = config['data']['bacillus']['promoter_fasta']
fasta_negative = config['data']['bacillus']['promoter_fasta']
seq_length = config['data']['bacillus']['seq_len']

full_pos_path = os.path.join(_PATH_TO_ROOT, fasta_positive)
full_neg_path = os.path.join(_PATH_TO_ROOT, fasta_negative)

k3_encoder = KmerEncoding(k=3)
data = FastaDataset(full_pos_path, full_neg_path, encoding_func=k3_encoder, seq_len=seq_length)
train_ds, val_ds, test_ds = dataset_split(data)

In [21]:
len(test_ds[0][0])

81

### 1. Setup Data

In [25]:
batch_size = config['train']['batch_size']

train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_ds, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False)

### 2. Initialize Model, Loss, and Optimizer

In [26]:
from torch import nn
from models.rnn import PromoterRNN

model = PromoterRNN(vocab_size=65, embed_dim=16, hidden_dim=32)
criterion = nn.BCEWithLogitsLoss() # Combines Sigmoid + Binary Cross Entropy
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


### 3. Training Loop

In [None]:
from eval.train_evals import TrainEvals
from utils.train import Train

train = Train(model, train_loader,val_loader,optimizer,criterion,[TrainEvals()],device)
train.train(10)

In [28]:
# epochs = 100
# for epoch in range(epochs):
#     model.train()
#     epoch_loss = 0
#
#     for batch_x, batch_y in train_loader:
#         # Clear previous gradients
#         optimizer.zero_grad()
#
#         # Forward pass
#         predictions = model(batch_x).squeeze() # remove extra dims
#
#         # Calculate loss (labels must be float for this criterion)
#         loss = criterion(predictions, batch_y.float())
#
#         # Backward pass
#         loss.backward()
#
#         # Update weights
#         optimizer.step()
#
#         epoch_loss += loss.item()
#
#     avg_loss = epoch_loss / len(train_loader)
#     print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")

Epoch [1/100], Loss: 0.6935
Epoch [2/100], Loss: 0.6891
Epoch [3/100], Loss: 0.6883
Epoch [4/100], Loss: 0.6870
Epoch [5/100], Loss: 0.6894
Epoch [6/100], Loss: 0.6856
Epoch [7/100], Loss: 0.6864
Epoch [8/100], Loss: 0.6851
Epoch [9/100], Loss: 0.6846
Epoch [10/100], Loss: 0.6847
Epoch [11/100], Loss: 0.6813
Epoch [12/100], Loss: 0.6807
Epoch [13/100], Loss: 0.6790
Epoch [14/100], Loss: 0.6780
Epoch [15/100], Loss: 0.6806
Epoch [16/100], Loss: 0.6807
Epoch [17/100], Loss: 0.6777
Epoch [18/100], Loss: 0.6724
Epoch [19/100], Loss: 0.6693
Epoch [20/100], Loss: 0.6795
Epoch [21/100], Loss: 0.6676
Epoch [22/100], Loss: 0.6715
Epoch [23/100], Loss: 0.6641
Epoch [24/100], Loss: 0.6650
Epoch [25/100], Loss: 0.6667
Epoch [26/100], Loss: 0.6595
Epoch [27/100], Loss: 0.6604
Epoch [28/100], Loss: 0.6554
Epoch [29/100], Loss: 0.6601
Epoch [30/100], Loss: 0.6567
Epoch [31/100], Loss: 0.6545
Epoch [32/100], Loss: 0.6589
Epoch [33/100], Loss: 0.6569
Epoch [34/100], Loss: 0.6596
Epoch [35/100], Loss: 0