In [1]:
%load_ext autoreload
%autoreload 2

In [13]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from C2AE import C2AE, save_model, load_model, Fe, Fx, Fd, eval_metrics

from sklearn.model_selection import train_test_split
from sklearn.metrics import hamming_loss, accuracy_score, f1_score, precision_score, recall_score
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from skmultilearn.dataset import load_dataset

In [3]:
device = torch.device('cuda')
train_x, train_y, feat_names, label_names = load_dataset('tmc2007_500', 'train')
test_x, test_y, _, _ = load_dataset('tmc2007_500', 'test')

train_dataset = TensorDataset(torch.tensor(train_x.todense(), device=device, dtype=torch.float),torch.tensor(train_y.todense(), device=device,dtype=torch.float))
test_dataset = TensorDataset(torch.tensor(test_x.todense(), device=device, dtype=torch.float), torch.tensor(test_y.todense(), device=device, dtype=torch.float))

tmc2007_500:train - exists, not redownloading
tmc2007_500:test - exists, not redownloading


In [4]:
train_dataset[:][0].shape, train_dataset[:][1].shape, test_dataset[:][0].shape, test_dataset[:][1].shape

(torch.Size([21519, 500]),
 torch.Size([21519, 22]),
 torch.Size([7077, 500]),
 torch.Size([7077, 22]))

### Metrics:

In [6]:
def micro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='micro')
def macro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='macro')
def micro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='micro')
def macro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='macro')
def micro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='micro')
def macro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='macro')
def ham_los(*args, **kwargs):
    return hamming_loss(*args, **kwargs)

In [8]:
# Training configs.
num_epochs = 1000
batch_size = 32
lr = 0.001
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# # Scene config
feat_dim = 500
latent_dim = 30
num_labels = 22
fx_h_dim=250
fe_h_dim=25
fd_h_dim=25

# Scene models.
Fx_tmc = Fx(feat_dim, fx_h_dim, fx_h_dim, latent_dim)
Fe_tmc = Fe(num_labels, fe_h_dim, latent_dim)
Fd_tmc = Fd(latent_dim, fd_h_dim, num_labels, fin_act=torch.sigmoid)
               
# Initializing net.
net = C2AE(Fx_tmc, Fe_tmc, Fd_tmc, beta=0.5, alpha=10, emb_lambda=0.01, latent_dim=latent_dim, device=device)
net = net.to(device)


# Doing weight_decay here is eqiv to adding the L2 norm.
optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=0)
writer = SummaryWriter(comment='tmc-c2ae')

In [10]:
print("Starting training!")
best_loss = np.infty
for epoch in range(num_epochs+1): 
    # Training.
    net.train()
    loss_tracker = 0.0
    latent_loss_tracker = 0.0
    cor_loss_tracker = 0.0
    for x, y in train_dataloader:
        optimizer.zero_grad()      

        # Pass x, y to network. Retrieve both encodings, and decoding of ys encoding.
        fx_x, fe_y, fd_z = net(x, y)
        # Calc loss.
        l_loss, c_loss = net.losses(fx_x, fe_y, fd_z, y)
        # Normalize losses by batch.
        l_loss /= x.shape[0]
        c_loss /= x.shape[0]
        loss = net.beta*l_loss + net.alpha*c_loss
        loss.backward()
        optimizer.step()
        
        loss_tracker+=loss.item()
        latent_loss_tracker+=l_loss.item()
        cor_loss_tracker+=c_loss.item()
    writer.add_scalar('train/loss', loss_tracker, epoch)
    writer.add_scalar('train/latent_loss', latent_loss_tracker, epoch)
    writer.add_scalar('train/corr_loss', cor_loss_tracker, epoch)
    
    # Evaluation
    net.eval()
    loss_tracker = 0.0
    latent_loss_tracker = 0.0
    cor_loss_tracker = 0.0
    acc_track = 0.0
    for x, y in test_dataloader:
        # evaluation only requires x. As its just Fd(Fx(x))
        fx_x, fe_y = net.Fx(x), net.Fe(y)
        fd_z = net.Fd(fx_x)

        l_loss, c_loss = net.losses(fx_x, fe_y, fd_z, y)
        # Normalize losses by batch.
        l_loss /= x.shape[0]
        c_loss /= x.shape[0]
        loss = net.beta*l_loss + net.alpha*c_loss
        
        latent_loss_tracker += l_loss.item()
        cor_loss_tracker += c_loss.item()
        loss_tracker += loss.item()
        lab_preds = torch.round(net.Fd(net.Fx(x))).cpu().detach().numpy()
        
    print(f"Epoch: {epoch}, Loss: {loss_tracker},  L-Loss: {latent_loss_tracker}, C-Loss: {cor_loss_tracker}")
    torch.save(net.state_dict(), f'./models/tmc_c2ae/{epoch}.pt')
    writer.add_scalar('val/loss', loss_tracker, epoch)
    writer.add_scalar('val/latent_loss', latent_loss_tracker, epoch)
    writer.add_scalar('val/corr_loss', cor_loss_tracker, epoch)
    
    # Log metrics on whole dataset.
    mets = eval_metrics(net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], 
                        [test_dataset, train_dataset], torch.device('cuda'))
    for k, v in mets['dataset_1'].items():
        writer.add_scalar(f'train/{k}', v, epoch)
    for k, v in mets['dataset_0'].items():
        writer.add_scalar(f'val/{k}', v, epoch)

Starting training!
Epoch: 0, Loss: 1603.3459386825562,  L-Loss: 161.7912923693657, C-Loss: 152.24502950906754


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 1, Loss: 1532.5684866905212,  L-Loss: 97.44115042686462, C-Loss: 148.38479125499725
Epoch: 2, Loss: 1458.135027885437,  L-Loss: 68.12967213988304, C-Loss: 142.40701895952225
Epoch: 3, Loss: 1452.509771823883,  L-Loss: 55.51321057975292, C-Loss: 142.47531658411026
Epoch: 4, Loss: 1429.612536430359,  L-Loss: 45.19582210481167, C-Loss: 140.70146322250366
Epoch: 5, Loss: 1405.0588760375977,  L-Loss: 38.17963683605194, C-Loss: 138.59690642356873
Epoch: 6, Loss: 1396.5495734214783,  L-Loss: 35.59750735759735, C-Loss: 137.87508231401443
Epoch: 7, Loss: 1376.5608539581299,  L-Loss: 31.512626834213734, C-Loss: 136.08045452833176
Epoch: 8, Loss: 1378.6399340629578,  L-Loss: 29.310264438390732, C-Loss: 136.39847999811172
Epoch: 9, Loss: 1366.875949382782,  L-Loss: 25.833763487637043, C-Loss: 135.3959070444107
Epoch: 10, Loss: 1378.0896735191345,  L-Loss: 25.846610516309738, C-Loss: 136.516636967659
Epoch: 11, Loss: 1357.6684894561768,  L-Loss: 22.64353458210826, C-Loss: 134.63467210531235


### Picking best model

In [16]:
eval_net = load_model(C2AE, './models/tmc_c2ae/617.pt', Fx=Fx_tmc, Fe=Fe_tmc, Fd=Fd_tmc, device=device).to(device)

In [17]:
mets = eval_metrics(eval_net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], [test_dataset, train_dataset], device)
mets

{'dataset_0': {'ham_los': 0.01547908076097987,
  'accuracy_score': 0.7394376148085346,
  'micro_f1': 0.9205826138535557,
  'micro_p': 0.945956928078017,
  'micro_r': 0.8965340179717587,
  'macro_f1': 0.8301798569314673,
  'macro_p': 0.8486577939541352,
  'macro_r': 0.8277309995802979},
 'dataset_1': {'ham_los': 0.017496166178725778,
  'accuracy_score': 0.7134160509317348,
  'micro_f1': 0.911416501791348,
  'micro_p': 0.9341854295924408,
  'micro_r': 0.8897310615551658,
  'macro_f1': 0.8171750057264234,
  'macro_p': 0.8338519768350644,
  'macro_r': 0.8231754931841647}}