In [1]:
import torch
import utils as ut
import numpy as np
import pandas as pd
import torch.nn as nn
from torch import optim
from time import time
import utils as ut
from sklearn.metrics import accuracy_score, precision_score
from sklearn.metrics import recall_score, f1_score, roc_auc_score
from warnings import filterwarnings
filterwarnings('ignore')
device = torch.device("gpu")

def compute_metrics(y, yhat, score1):
    y = y.detach().cpu().numpy()
    yhat = yhat.detach().cpu().numpy()
    score1 = score1.detach().cpu().numpy()

    # macro weighs each class equally
    # micro weights classes based on class prior
    # micro=macro if classes balanced
    # in binary classification; micro=macro
    auc = roc_auc_score(y, score1)
    acc = accuracy_score(y, yhat)
    r1 = recall_score(y, yhat)
    p1 = precision_score(y, yhat)
    f1 = f1_score(y, yhat)
    metrics = {
        'acc': acc,
        'f1': f1,
        'p1': p1,  # precision for class 1
        'r1': r1,  # recall for class 1
        'auc': auc
    }
    return metrics


def train(model, optimizer, loss_fn, train_loader, device):
    ntrain = len(train_loader.dataset)
    model = model.to(device)
    running_loss_tr = 0.0
    model.train()
    for x_tr_b, y_tr_b, _ in train_loader:
        # Forward pass
        x_tr_b = x_tr_b.to(device)
        y_tr_b = y_tr_b.to(device)
        optimizer.zero_grad()
        score_tr_b = model(x_tr_b)
        # loss of each elem in batch
        loss_tr_b = loss_fn(score_tr_b, y_tr_b)
        loss_tr_mean_b = torch.mean(loss_tr_b)

        # Backward and optimize
        loss_tr_mean_b.backward(retain_graph=True)
        optimizer.step()
        running_loss_tr += torch.sum(loss_tr_b)
        loss_tr = running_loss_tr / ntrain
    return model, optimizer, loss_tr

def test(model, loss_fn, val_loader, device):
    nval = len(val_loader.dataset)
    ##### Model Validation ##########
    y_val = []
    score_val = []
    yhat_val = []
    with torch.no_grad():
        model.eval()
        running_loss_val = 0
        for x_val_b, y_val_b, _ in val_loader:
            x_val_b = x_val_b.to(device)
            y_val_b = y_val_b.to(device)

            # Forward pass
            score_val_b = model(x_val_b)  # logits
            _, yhat_val_b = torch.max(score_val_b, 1)
            lossb_val = loss_fn(score_val_b, y_val_b)

            # book keeping at batch level
            running_loss_val += torch.sum(lossb_val)

            y_val.append(y_val_b)
            score_val.append(score_val_b)
            yhat_val.append(yhat_val_b)

        loss_val = running_loss_val / nval
        # loss_val.append(loss_val)

        # predictions and  metrics
        y_val = torch.cat(y_val)
        score_val = torch.cat(score_val)
        yhat_val = torch.cat(yhat_val)

        metrics = compute_metrics(y_val, yhat_val, score_val[:, 1])
    return loss_val, metrics

# mem_params = sum([param.nelement()*param.element_size() for param in model.parameters()])
# mem_bufs = sum([buf.nelement()*buf.element_size() for buf in model.buffers()])
# mem = mem_params + mem_bufs

In [3]:
if __name__=="main":
    base_dir = "data/15m/224/"
    train_dir = base_dir + "test/"
    valid_dir = base_dir + "train/"

    num_epochs=10
    bs = 16

    # dataset and data loader
    dstr = ut.RumexDataset(train_dir)
    dltr = ut.train_loader(dstr, bs)

    dsva = ut.RumexDataset(valid_dir)
    dlva = ut.test_loader(dsva, bs)

    dste = ut.RumexDataset(test_dir)
    dlte = ut.test_loader(dste, bs)

    model = torch.load("results/10m/from_triton/resnet_trainer.pt")
    model = model.model

    loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
model = model.to(device)
for ep in np.arange(num_epochs):
    model, optimizer, loss_tr = train(model, optimizer, loss_fn, dltr, device)
    loss_va, metrics = test(model, loss_fn, dlva, device)
#     print(f"ep:{ep}|loss_tr: {loss_tr:.5f}|loss_va: {loss_va:.5f}")
    
#     if ep % 10 == 0:
    print(f"ep:{ep}|tr: {loss_tr:.5f}|loss: {loss_va:.5f}|acc:{metrics['acc']:.5f}" +
          f"|auc:{metrics['auc']:.5f}|f1:{metrics['f1']:.5f}|p1:{metrics['p1']:.5f}|r1:{metrics['r1']:.5f}")


ep:0|tr: 0.52583|loss: 9.28644|acc:0.73836|auc:0.30018|f1:0.06349|p1:0.22222|r1:0.03704


In [None]:
best_val_loss = np.inf
best_val_acc = 0.5
num_epochs=20
# history= np.zeros((num_epochs, 5))
for ep in np.arange(num_epochs):
    start = time()

    #### fit model ##########
    loss = train(model, dltr, optimizer, scheduler, loss_fn, device)

    ##### Model Validation ##########
    predictions, metrics = validate(model, dlva, loss_fn, device)

    history[ep, 0] = loss # training loss
    history[ep, 1] = metrics["loss"] # validation loss
    history[ep, 2] = metrics["acc"] # validation acc
    history[ep, 3] = metrics["f1"] # validation acc
    history[ep, 4] = metrics["auc"] # validation acc    

    ##### checkpoint saving and logging ##########
    if metrics['loss'] < best_val_loss:
        best_val_loss = metrics['loss']
        ckpt_dict = {'ep': ep,
                     'state_dict': model.state_dict(),
                     'optim_dict': optimizer.state_dict(),
                     'predictions': predictions,
                     'metrics': metrics}    
        ut.save_ckpt(ckpt_dict, log_dir)

    # tensorboad logging
    writer.add_scalar('train/loss', loss, ep)
    for key in metrics.keys():
        name = 'val/'+key
        writer.add_scalar(name, metrics[key], ep)   
    
    
    et = time() - start
    print(f"ep:{ep}|et:{et:.3f}|loss_tr:{loss:.5f}|loss: {metrics['loss']:.5f}" +
          f"|acc:{metrics['acc']:.5f}|re:{metrics['pre']:.5f}" +
          f"|pre:{metrics['recall']:.5f}|f1:{metrics['f1']:.5f}|auc:{metrics['auc']:.5f}")

# np.save(log_dir+"/history.npy", history)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history[:, 0])
plt.plot(history[:, 1])
plt.plot(history[:, 2])