In [None]:
%load_ext autoreload
%autoreload 2

# C2AE Architecture
* X:
    * (N, d)
* Y:
    * (N, m)
* Z:
    * (N, l)

## Three main components:
* Fx:
    * Encodes x into latent space z.
* Fe:
    * Encodes y into latent space z.
* Fd:
    * Decodes z into label space. 

## Loss functions:

$$L_1 = ||F_x(X) - F_e(Y)||^2 s.t. F_x(X)Fx(X)^T = F_e(Y)F_e(Y)^T = I$$
$$L_2 = \Gamma(F_e, F_d) = \Sigma_i^N E_i$$
$$E_i = \frac{1}{|y_i^1||y_i^0|} \Sigma_{p,q \in y_i^1\times y_i^0} e^{F_d(F_e(y_i))^q - F_d(F_e(y_I))^p}$$

## Combined Loss:
$$L_1 + \alpha L_2$$

In [47]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from C2AE import C2AE, save_model, load_model, Fe, Fx, Fd, eval_metrics

from sklearn.model_selection import train_test_split
from sklearn.metrics import hamming_loss, accuracy_score, f1_score, precision_score, recall_score
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from skmultilearn.dataset import load_dataset

In [48]:
device = torch.device('cuda')
train_x, train_y, feat_names, label_names = load_dataset('scene', 'train')
test_x, test_y, _, _ = load_dataset('scene', 'test')

train_dataset = TensorDataset(torch.tensor(train_x.todense(), device=device, dtype=torch.float),torch.tensor(train_y.todense(), device=device,dtype=torch.float))
test_dataset = TensorDataset(torch.tensor(test_x.todense(), device=device, dtype=torch.float), torch.tensor(test_y.todense(), device=device, dtype=torch.float))

scene:train - exists, not redownloading
scene:test - exists, not redownloading


In [49]:
train_dataset[:][0].shape, train_dataset[:][1].shape, test_dataset[:][0].shape, test_dataset[:][1].shape

(torch.Size([1211, 294]),
 torch.Size([1211, 6]),
 torch.Size([1196, 294]),
 torch.Size([1196, 6]))

### Metrics:

In [77]:
def micro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='micro')
def macro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='macro')
def micro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='micro')
def macro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='macro')
def micro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='micro')
def macro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='macro')
def ham_los(*args, **kwargs):
    return hamming_loss(*args, **kwargs)

In [78]:
# Training configs.
num_epochs = 1000
batch_size = 32
lr = 0.001
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# # Scene config
feat_dim = 294
latent_dim = 5
num_labels = 6
fx_h_dim=20
fe_h_dim=20
fd_h_dim=50

# Scene models.
Fx_scene = Fx(feat_dim, fx_h_dim, fx_h_dim, latent_dim)
Fe_scene = Fe(num_labels, fe_h_dim, latent_dim)
Fd_scene = Fd(latent_dim, fd_h_dim, num_labels, fin_act=torch.sigmoid)
               
# Initializing net.
net = C2AE(Fx_scene, Fe_scene, Fd_scene, beta=0.5, alpha=10, emb_lambda=0.01, latent_dim=latent_dim, device=device)
net = net.to(device)


# Doing weight_decay here is eqiv to adding the L2 norm.
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
writer = SummaryWriter(comment='scene-c2ae')

In [79]:
print("Starting training!")
for epoch in range(num_epochs+1): 
    # Training.
    net.train()
    loss_tracker = 0.0
    latent_loss_tracker = 0.0
    cor_loss_tracker = 0.0
    for x, y in train_dataloader:
        optimizer.zero_grad()      

        # Pass x, y to network. Retrieve both encodings, and decoding of ys encoding.
        fx_x, fe_y, fd_z = net(x, y)
        # Calc loss.
        l_loss, c_loss = net.losses(fx_x, fe_y, fd_z, y)
        # Normalize losses by batch.
        l_loss /= x.shape[0]
        c_loss /= x.shape[0]
        loss = net.beta*l_loss + net.alpha*c_loss
        loss.backward()
        optimizer.step()
        
        loss_tracker+=loss.item()
        latent_loss_tracker+=l_loss.item()
        cor_loss_tracker+=c_loss.item()
    writer.add_scalar('train/loss', loss_tracker, epoch)
    writer.add_scalar('train/latent_loss', latent_loss_tracker, epoch)
    writer.add_scalar('train/corr_loss', cor_loss_tracker, epoch)
    
    # Evaluation
    net.eval()
    loss_tracker = 0.0
    latent_loss_tracker = 0.0
    cor_loss_tracker = 0.0
    acc_track = 0.0
    for x, y in test_dataloader:
        # evaluation only requires x. As its just Fd(Fx(x))
        fx_x, fe_y = net.Fx(x), net.Fe(y)
        fd_z = net.Fd(fx_x)

        l_loss, c_loss = net.losses(fx_x, fe_y, fd_z, y)
        # Normalize losses by batch.
        l_loss /= x.shape[0]
        c_loss /= x.shape[0]
        loss = net.beta*l_loss + net.alpha*c_loss
        
        latent_loss_tracker += l_loss.item()
        cor_loss_tracker += c_loss.item()
        loss_tracker += loss.item()
        lab_preds = torch.round(net.Fd(net.Fx(x))).cpu().detach().numpy()
        
    print(f"Epoch: {epoch}, Loss: {loss_tracker},  L-Loss: {latent_loss_tracker}, C-Loss: {cor_loss_tracker}")
    torch.save(net.state_dict(), f'./models/scene/scene_c2ae/v3_{epoch}.pt')
    writer.add_scalar('val/loss', loss_tracker, epoch)
    writer.add_scalar('val/latent_loss', latent_loss_tracker, epoch)
    writer.add_scalar('val/corr_loss', cor_loss_tracker, epoch)
    
    # Log metrics on whole dataset.
    mets = eval_metrics(net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], 
                        [test_dataset, train_dataset], torch.device('cuda'))
    for k, v in mets['dataset_1'].items():
        writer.add_scalar(f'train/{k}', v, epoch)
    for k, v in mets['dataset_0'].items():
        writer.add_scalar(f'val/{k}', v, epoch)

Starting training!
Epoch: 0, Loss: 383.4921340942383,  L-Loss: 3.1638980265706778, C-Loss: 38.19101822376251
Epoch: 1, Loss: 382.1139135360718,  L-Loss: 1.6566793415695429, C-Loss: 38.128557562828064
Epoch: 2, Loss: 382.2040843963623,  L-Loss: 3.001723300665617, C-Loss: 38.07032233476639
Epoch: 3, Loss: 382.06808853149414,  L-Loss: 4.195376731455326, C-Loss: 37.997039914131165
Epoch: 4, Loss: 382.2564105987549,  L-Loss: 5.8342824429273605, C-Loss: 37.933926820755005
Epoch: 5, Loss: 384.197003364563,  L-Loss: 10.322519063949585, C-Loss: 37.90357440710068
Epoch: 6, Loss: 386.89673137664795,  L-Loss: 16.923314929008484, C-Loss: 37.84350764751434
Epoch: 7, Loss: 388.9680995941162,  L-Loss: 23.168397426605225, C-Loss: 37.738390266895294
Epoch: 8, Loss: 392.5284299850464,  L-Loss: 28.516899675130844, C-Loss: 37.82699805498123
Epoch: 9, Loss: 392.86380100250244,  L-Loss: 33.38949924707413, C-Loss: 37.616905093193054
Epoch: 10, Loss: 394.351601600647,  L-Loss: 37.51728665828705, C-Loss: 37.559

### Picking best model

In [91]:
eval_net = load_model(C2AE, './models/scene/scene_c2ae/v3_299.pt', Fx=Fx_scene, Fe=Fe_scene, Fd=Fd_scene, device=device).to(device)

In [92]:
mets = eval_metrics(net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], [test_dataset, train_dataset], device)
mets

{'dataset_0': {'ham_los': 0.10089186176142698,
  'accuracy_score': 0.612876254180602,
  'micro_f1': 0.6978297161936561,
  'micro_p': 0.7620783956244302,
  'micro_r': 0.6435719784449576,
  'macro_f1': 0.7001454295863437,
  'macro_p': 0.7675026918519365,
  'macro_r': 0.6530980687586226},
 'dataset_1': {'ham_los': 0.05560143132397467,
  'accuracy_score': 0.7663088356729976,
  'micro_f1': 0.8319467554076538,
  'micro_p': 0.8944543828264758,
  'micro_r': 0.7776049766718507,
  'macro_f1': 0.8430357000544705,
  'macro_p': 0.9024568196453421,
  'macro_r': 0.8004112815170829}}