In [1]:
import torch
import gzip
from pathlib import Path
from torch.utils.data import random_split
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from src.data import ProteinsDataset, AllVertices, ProteinRecord
from src.nn_model import AmberNN


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
_ = torch.manual_seed(142)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
protein = ["1FZW_B", "1GP2_BG", "2E7J_B", "2E89_A", "2EG5_E", "2NYZ_A",
           "2OOR_A", "3AAA_AB",
           "7FCT_A", "7MX9_A", "7N8G_A", "7QRR_A", "7WUG_145"] 
# "6BOY_BC", <- left it for testing
pd = AllVertices(protein)

n_features = pd[0][0].shape[0]

train_dataset, test_dataset = random_split(pd, [0.85, 0.15],
             generator=torch.Generator().manual_seed(32))

In [4]:
print(len(pd))
print(n_features)


230080
111


In [None]:
# test_dataset[2]

In [5]:
learning_rate = 5e-3
batch_size = 128

# Create data loaders.
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

model = AmberNN(n_features, 3, 256).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [6]:
X = torch.stack([train_dataset[333][0], train_dataset[235][0]], axis=0)
X = X.to(device)

In [7]:
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([0, 0], device='cuda:0')


In [8]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X,y) in enumerate(dataloader):
        # Compute prediction and loss
        X = X.to(device)
        y = y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 1000 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            
            X = X.to(device)
            y = y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [9]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 1.136240  [  128/195568]
loss: 0.916356  [128128/195568]
Test Error: 
 Accuracy: 64.5%, Avg loss: 0.814637 

Epoch 2
-------------------------------
loss: 0.877786  [  128/195568]
loss: 0.901410  [128128/195568]
Test Error: 
 Accuracy: 64.7%, Avg loss: 0.802857 

Epoch 3
-------------------------------
loss: 0.847979  [  128/195568]
loss: 0.894674  [128128/195568]
Test Error: 
 Accuracy: 65.1%, Avg loss: 0.795380 

Epoch 4
-------------------------------
loss: 0.851564  [  128/195568]
loss: 0.884816  [128128/195568]
Test Error: 
 Accuracy: 65.3%, Avg loss: 0.790226 

Epoch 5
-------------------------------
loss: 0.837072  [  128/195568]
loss: 0.863919  [128128/195568]
Test Error: 
 Accuracy: 65.5%, Avg loss: 0.784770 

Epoch 6
-------------------------------
loss: 0.835331  [  128/195568]
loss: 0.868621  [128128/195568]
Test Error: 
 Accuracy: 65.6%, Avg loss: 0.779945 

Epoch 7
-------------------------------
loss: 0.851050  [  128/195568]

In [10]:
torch.save(model.state_dict(), "20230301_model.pytorch")

## Test the model

In [None]:
Trial_pdb = "6BOY_B"

In [None]:
data = ProteinRecord(Trial_pdb)

In [None]:
X = data.f.to(device)
p = data.p.to(device)
y = data.y_aux.to(device)

In [None]:
with torch.no_grad():
    logits = model(X)
    pred_probab = nn.Softmax(dim=1)(logits)
    y_pred = pred_probab.argmax(1)
    # print(f"Predicted class: {y_pred}")

In [None]:
save_for_paraview(f"predicted_{Trial_pdb}.csv", p, y_pred)
save_for_paraview(f"real_{Trial_pdb}.csv", p, y)
        

In [None]:
y[580:680]