In [1]:
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision import transforms,utils
import time 


In [2]:
sys.path.append("lib/")
import model
import data_handle
import random
random.seed(1)

### Hyper and Path

In [3]:
input_dim = 27  # Number of amino acids (replace with your actual input dimension)
output_dim = 2  # Binary labels
hidden_dim = 128
num_layers = 1
num_heads = 8
EPOCH = 300
PRETRAIN = False
ALPHA = 20
alpha = 1/ALPHA
MODELNAME = f"Alpha{ALPHA}_8Head_128Hidden_Start_end"
# latest_model -> Path to pretrained weight
latest_model = None
if PRETRAIN :
    START,_ = latest_model.split('.')
    _,START = START.split("_")
    START = int(START)
else:
    START= 0
TRAIN = "data/ATP_train.txt"
TEST = "data/ATP_test.txt"

In [4]:
import os
os.system(f"mkdir Model\{MODELNAME}")

0

In [5]:
train_set = data_handle.ProteinDataset(TRAIN)
train_set,val_set = torch.utils.data.random_split(train_set,[280,67])
train_loader = data.DataLoader(train_set, batch_size=1, shuffle=True)
val_loader = data.DataLoader(val_set, batch_size=1, shuffle=False)
test_set = data_handle.ProteinDataset(TEST)

### Check Device

In [6]:
if torch.cuda.is_available():
    # Initialize CUDA device
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

CUDA is available. Using GPU.


### Load and Save Model

In [7]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'state_dict': model.state_dict(),
             'optimizer' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    #print('model saved to %s' % checkpoint_path)
    
def load_checkpoint(checkpoint_path, model, optimizer):
    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])
    optimizer.load_state_dict(state['optimizer'])
    #print('model loaded from %s' % checkpoint_path)

### Define Training Function

In [8]:
def train_model(model,train_loader,val_loader,optimizer,epochs): 
    val_log = []
    train_log=[]
    acc_log = [] 
    rc_log = []
    weights = torch.tensor([alpha,1-alpha]).to(device)
    criterion = nn.CrossEntropyLoss(weight=weights)
    c_time = time.time()
    N_train = len(train_loader)
    for epoch in range(epochs):
        total_loss = 0 
        for sequences, labels in train_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(sequences)
            y_tensor = torch.tensor(labels, dtype=torch.long)
            loss = criterion(outputs.view(-1, output_dim), y_tensor.view(-1))
            #loss = criterion(outputs.view(-1, output_dim), labels.view(-1))
            total_loss += loss.item()
            loss.backward()
            optimizer.step()
        #record  
        acc,rc,val_loss = test(model,val_loader,criterion)
        acc_log.append(acc)
        rc_log.append(rc)
        val_log.append(val_loss)
        train_log.append(total_loss/N_train)
        print(f"[{epoch}/{epochs}] Epoch {epoch+1}, Average Train Loss: {total_loss/N_train} , Average Validation Loss:{val_loss}",end='\r')
    

        if (epoch+1)%5 == 0:
            path = f"Model/{MODELNAME}/{MODELNAME}_{epoch+1+START}.pth"
            save_checkpoint(path,model,optimizer)
        
        
    print("\ntime spent: ",time.time()-c_time)

    return acc_log,rc_log,train_log,val_log

def test(model,data_loader,criterion):
    model.eval()
    total_loss = 0
    N = len(data_loader.dataset)
    #turn off gradient calculation, for computation speed
    tp=fp=tn=fn = 0 

    with torch.no_grad(): 
        for data,target in data_loader:
            data,target = data.to(device),target.to(device)
            outputs = model(data)
            y_tensor = torch.tensor(target, dtype=torch.long)
            loss = criterion(outputs.view(-1, output_dim), y_tensor.view(-1))
            total_loss += loss.item()
            #Evaluate on accuracy 
            pred = outputs.argmax(dim=2)
            c = pred/y_tensor
            tp += torch.sum(c == 1).item()
            fp += torch.sum(c == float('inf')).item()
            tn += torch.sum(torch.isnan(c)).item()
            fn += torch.sum(c == 0).item()
        average_loss = total_loss/len(data_loader.dataset)
        acc = (tp+tn)/(tp+tn+fn+fp)
        recall = (tp)/(tp+fn)
    #pred = outputs.argmax(dim=2)
    #print(f"Average Loss Validation {N} Image: {average_loss}")
    #print(f"Accuracy = {tp/N}")
    return (acc,recall,average_loss)

### Load the Latest Model and continue Training (Or Train from Scratch)

In [9]:
model = model.TransformerClassifier(input_dim, hidden_dim, output_dim, num_layers, num_heads)
optimizer = optim.SGD(model.parameters(), lr=0.01)
if PRETRAIN:
    load_checkpoint(latest_model,model,optimizer)



### Start Training

In [10]:
model.to(device)
acc_log,rc_log,train_log,val_log = train_model(model,train_loader,val_loader,optimizer,EPOCH)

  return torch.IntTensor(sequence), torch.tensor(label)
  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)
  y_tensor = torch.tensor(labels, dtype=torch.long)
  y_tensor = torch.tensor(target, dtype=torch.long)


[299/300] Epoch 300, Average Train Loss: 0.09695379939283677 , Average Validation Loss:3.9963394087641997
time spent:  189.74892330169678


In [11]:
import pickle as pk
'''
TRAIN_LOSS = f"loss_log/train_loss_{EPOCH+START}"
VAL_LOSS = f"loss_log/val_loss_{EPOCH+START}"
with open(TRAIN_LOSS, 'wb') as f:
    pk.dump(train_log, f)
with open(VAL_LOSS, 'wb') as f:
    pk.dump(val_log, f)
    '''

'\nTRAIN_LOSS = f"loss_log/train_loss_{EPOCH+START}"\nVAL_LOSS = f"loss_log/val_loss_{EPOCH+START}"\nwith open(TRAIN_LOSS, \'wb\') as f:\n    pk.dump(train_log, f)\nwith open(VAL_LOSS, \'wb\') as f:\n    pk.dump(val_log, f)\n    '

In [12]:
meta = {}
meta['input_dim'] = input_dim
meta['output_dim'] = output_dim
meta['hidden_dim'] = hidden_dim
meta['num_layers'] = num_layers
meta['num_heads'] = num_heads
train_result = {}
train_result['acc_log'] = acc_log
train_result['rc_log'] = rc_log
train_result['train_log'] = train_log
train_result['val_log'] = val_log
train_result['meta'] = meta
META = f'loss_log/{MODELNAME}'
with open(META, 'wb') as f:
    pk.dump(train_result, f)
print(MODELNAME)

Alpha20_8Head_128Hidden_Start_end
