In [1]:
import math
import argparse
import pandas as pd
import random

import matplotlib.pyplot as plt
import torch
import numpy as np

from torchvision import transforms, utils
from torch import nn 
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.functional as F

import import_ipynb
import metrics
import data_transformation
import train_validation
import model_architecture
import utils
import unittest
from sklearn.model_selection import train_test_split  

importing Jupyter notebook from metrics.ipynb
importing Jupyter notebook from data_transformation.ipynb
importing Jupyter notebook from train_validation.ipynb
importing Jupyter notebook from model_architecture.ipynb
importing Jupyter notebook from utils.ipynb


In [2]:
def test_modules(module_test):
    """Run unittest in imported files"""
    
    suite = unittest.TestLoader().loadTestsFromModule(module_test)
    unittest.TextTestRunner(verbosity=1).run(suite)

In [3]:
testmodules = [data_transformation.data_class_test(), metrics.metrics_test(), utils.utils_test()]
for test in testmodules:
    test_modules(test)

..
----------------------------------------------------------------------
Ran 2 tests in 23.866s

OK
..
----------------------------------------------------------------------
Ran 2 tests in 0.006s

OK
..
----------------------------------------------------------------------
Ran 2 tests in 0.114s

OK


In [4]:
params = {
    'batch_size': 32,
    'save_model_path': "test.chkpt",
    'epoch': 1
}

In [5]:
data_class = data_transformation.data_transformation(path_data = "/data/data_curated_20180219/curated_training_data_no_mass_spec.csv",
                                                     path_mhc = "/data/aligned_mhc_dataset.csv",
                                                     allele_name = "HLA-A*02:01",
                                                     quant_data = True,
                                                     encoding = "one-hot")

pep, mhc, target = data_class.__getitem__()

pep = np.expand_dims(pep, axis=1)
mhc = np.expand_dims(mhc, axis=1)
inp = np.hstack((pep, mhc))
print(inp.shape)

X_train, X_test, y_train, y_test = train_test_split(inp, target.T, test_size=0.2, random_state=42)

model = model_architecture.convnet(inputchannel = np.size(X_train, 3),
                                   L = np.size(X_train, 2))

pytorch_total_params = sum(p.numel() for p in model.parameters())
print("Model parameters: " + str(pytorch_total_params) )
if torch.cuda.device_count() > 0:
    print('Using GPU' + str(utils.pick_gpu_lowest_memory()))
    device = torch.device('cuda:' + str(utils.pick_gpu_lowest_memory()))
else:
    print('Using CPU')
    device = torch.device('cpu')
    
model = model.to(device)
criterion = metrics.select_criterion('MSE')
optimizer = optim.Adam(
        filter(lambda x: x.requires_grad, model.parameters()),
        betas=(0.9, 0.98), eps=1e-09, weight_decay = 0.0)


train_data = torch.utils.data.TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train))
test_data = torch.utils.data.TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test))

train_dataloader = torch.utils.data.DataLoader(train_data,
                              batch_size = params['batch_size'],
                              shuffle = True, 
                              drop_last = True)
eval_dataloader = torch.utils.data.DataLoader(test_data,
                              batch_size = params['batch_size'],
                              shuffle = True,
                              drop_last = True)

(11705, 2, 34, 20)
Model parameters: 219675
Using CPU


In [None]:
valid_losss, train_losses, valid_accus, train_accus = train_validation.start_training(params['save_model_path'], params['epoch'], model,
                                                                                      train_dataloader, eval_dataloader, optimizer,
                                                                                      device, criterion)

[ Epoch 0 ]


  - (Training)   :  12%|███████▋                                                      | 36/292 [00:58<07:01,  1.65s/it]

In [8]:
valid_losss, train_losses, valid_accus, train_accus = train_validation.continue_training(params['save_model_path'], params['epoch'], model, train_dataloader, eval_dataloader, optimizer, device, criterion)

Checkpoint found and loaded - Resuming training
[ Epoch 2 ]


                                                                                                                       

  - (Training)   ppl: 0.14249686151742935, Accuracy: 0.20002837834907883, elapse: 0.18731647729873657 min


                                                                                                                       

  - (Validation) ppl: 0.10182568430900574, Accuracy: -0.05117330838421105, elapse: 0.012834278742472331 min
[ Epoch 3 ]


                                                                                                                       

  - (Training)   ppl: 0.10645390748977661, Accuracy: 0.31931252712171926, elapse: 0.22762589454650878 min


                                                                                                                       

  - (Validation) ppl: 0.08408648520708084, Accuracy: 0.10670699137871208, elapse: 0.019465947151184083 min
