In [101]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [154]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchvision as tv

import tensorflow as tf

from C2AE import C2AE, save_model, load_model, Fe, Fx, Fd, eval_metrics

from sklearn.model_selection import train_test_split
from sklearn.metrics import hamming_loss, accuracy_score, f1_score, precision_score, recall_score
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from scipy.io import arff

import arff as arff2

# C2AE Architecture
* X:
    * (N, d)
* Y:
    * (N, m)
* Z:
    * (N, l)

## Three main components:
* Fx:
    * Encodes x into latent space z.
* Fe:
    * Encodes y into latent space z.
* Fd:
    * Decodes z into label space. 

## Loss functions:

$$L_1 = ||F_x(X) - F_e(Y)||^2 s.t. F_x(X)Fx(X)^T = F_e(Y)F_e(Y)^T = I$$
$$L_2 = \Gamma(F_e, F_d) = \Sigma_i^N E_i$$
$$E_i = \frac{1}{|y_i^1||y_i^0|} \Sigma_{p,q \in y_i^1\times y_i^0} e^{F_d(F_e(y_i))^q - F_d(F_e(y_I))^p}$$

## Combined Loss:
$$L_1 + \alpha L_2$$

In [4]:
nus_train = arff2.load(open('./nus/nus-wide-full-cVLADplus-train.arff'))
nus_test = arff2.load(open('./nus/nus-wide-full-cVLADplus-test.arff'))

In [161]:
# nus_train_x[nus_train_y.sum(axis=1) != 0][:2**10]
# nus_train_y[nus_train_y.sum(axis=1) != 0][:2**10]
nus_train_x = np.array(nus_train['data'])[:, 1:129].astype('float64')
nus_train_y = np.array(nus_train['data'])[:, 129:].astype('float64')
nus_train_x = nus_train_x[nus_train_y.sum(axis=1) != 0][:2**10]
nus_train_y = nus_train_y[nus_train_y.sum(axis=1) != 0][:2**10]


nus_test_x = np.array(nus_test['data'])[:, 1:129].astype('float64')
nus_test_y = np.array(nus_test['data'])[:, 129:].astype('float64')
nus_test_x = nus_test_x[nus_test_y.sum(axis=1) != 0][:2**10]
nus_test_y = nus_test_y[nus_test_y.sum(axis=1) != 0][:2**10]
train_dataset = TensorDataset(torch.Tensor(nus_train_x), torch.Tensor(nus_train_y))
test_dataset = TensorDataset(torch.Tensor(nus_test_x), torch.Tensor(nus_test_y))

In [164]:
(2**10)/4

256.0

In [169]:
# Training configs.
batch_size = 54
num_epochs = 1000
lr = 0.0001
device = torch.device('cuda')

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# # Nus config
feat_dim = 128
latent_dim = 10
num_labels = 81
h_dim=50
fx_h_dim=100
# Scene models.
Fx_nus = Fx(feat_dim, fx_h_dim, fx_h_dim, latent_dim)
Fe_nus = Fe(num_labels, h_dim, latent_dim)
Fd_nus = Fd(latent_dim, h_dim, num_labels, fin_act=torch.sigmoid)
               
# Initializing net.
net = C2AE(Fx_nus, Fe_nus, Fd_nus, alpha=5, emb_lambda=0.001, latent_dim=latent_dim, device=device)
net = net.to(device)


# Doing weight_decay here is eqiv to adding the L2 norm.
optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=0.0)
writer = SummaryWriter(comment='nus-small')

In [170]:
def micro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='micro')
def macro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='macro')
def micro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='micro')
def macro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='macro')
def micro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='micro')
def macro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='macro')
def ham_los(*args, **kwargs):
    return hamming_loss(*args, **kwargs)

In [171]:
print("Starting training!")
best_loss = np.infty
for epoch in range(num_epochs+1): 
    # Training.
    net.train()
    loss_tracker = 0.0
    latent_loss_tracker = 0.0
    cor_loss_tracker = 0.0
    for x, y in train_dataloader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()      

        # Pass x, y to network. Retrieve both encodings, and decoding of ys encoding.
        fx_x, fe_y, fd_z = net(x, y)
        # Calc loss.
        l_loss, c_loss = net.losses(fx_x, fe_y, fd_z, y)
        # Normalize losses by batch.
        l_loss /= x.shape[0]
        c_loss /= x.shape[0]
        loss = l_loss + net.alpha*c_loss
        loss.backward()
        optimizer.step()
        
        loss_tracker+=loss.item()
        latent_loss_tracker+=l_loss.item()
        cor_loss_tracker+=c_loss.item()
    writer.add_scalar('train/loss', loss_tracker, epoch)
    writer.add_scalar('train/latent_loss', latent_loss_tracker, epoch)
    writer.add_scalar('train/corr_loss', cor_loss_tracker, epoch)
    
    # Evaluation
    net.eval()
    loss_tracker = 0.0
    latent_loss_tracker = 0.0
    cor_loss_tracker = 0.0
    acc_track = 0.0
    for x, y in test_dataloader:
        x, y = x.to(device), y.to(device)
        # evaluation only requires x. As its just Fd(Fx(x))
        fx_x, fe_y = net.Fx(x), net.Fe(y)
        fd_z = net.Fd(fx_x)

        l_loss, c_loss = net.losses(fx_x, fe_y, fd_z, y)
        # Normalize losses by batch.
        l_loss /= x.shape[0]
        c_loss /= x.shape[0]
        loss = l_loss + net.alpha*c_loss
        
        latent_loss_tracker += l_loss.item()
        cor_loss_tracker += c_loss.item()
        loss_tracker += loss.item()
        lab_preds = torch.round(net.Fd(net.Fx(x))).cpu().detach().numpy()
        acc_track += accuracy_score(y.cpu().detach().numpy(), lab_preds)
        
#     if loss_tracker < best_loss:
#         best_loss = loss_tracker
#         print("Saving model.")
#         torch.save(net.state_dict(), f'./models/nus_best/best.pt')
    if epoch % 100 == 0:
        torch.save(net.state_dict(), f'./models/nus_best/best_{epoch}.pt')
        print(f"Epoch: {epoch}, Loss: {loss_tracker},  L-Loss: {latent_loss_tracker}, C-Loss: {cor_loss_tracker}")
    mets = eval_metrics(net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], 
                        [test_dataset, train_dataset], torch.device('cuda'))
    
    # Val
    for k, v in mets['dataset_1'].items():
        writer.add_scalar(f'train/{k}', v, epoch)
    
    # Train
    for k, v in mets['dataset_0'].items():
        writer.add_scalar(f'val/{k}', v, epoch)
    
    writer.add_scalar('val/loss', loss_tracker, epoch)
    writer.add_scalar('val/latent_loss', latent_loss_tracker, epoch)
    writer.add_scalar('val/corr_loss', cor_loss_tracker, epoch)
#     writer.add_scalar('val/acc', acc_track, epoch)
print(eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda')))

Starting training!
Epoch: 0, Loss: 96.51275873184204,  L-Loss: 1.008283395320177, C-Loss: 19.100895047187805
Epoch: 100, Loss: 58.89303755760193,  L-Loss: 1.3860370218753815, C-Loss: 11.501400113105774
Epoch: 200, Loss: 57.99732780456543,  L-Loss: 2.07010905072093, C-Loss: 11.185443818569183
Epoch: 300, Loss: 58.92934536933899,  L-Loss: 3.254526101052761, C-Loss: 11.134963899850845
Epoch: 400, Loss: 61.09421181678772,  L-Loss: 5.580952599644661, C-Loss: 11.102651804685593
Epoch: 500, Loss: 62.69799470901489,  L-Loss: 7.070828527212143, C-Loss: 11.125433325767517
Epoch: 600, Loss: 62.806777000427246,  L-Loss: 7.005776256322861, C-Loss: 11.160200208425522
Epoch: 700, Loss: 62.80078625679016,  L-Loss: 6.675743967294693, C-Loss: 11.225008308887482
Epoch: 800, Loss: 63.28416895866394,  L-Loss: 6.414846122264862, C-Loss: 11.373864531517029
Epoch: 900, Loss: 63.1254026889801,  L-Loss: 6.2795035392045975, C-Loss: 11.369179874658585
Epoch: 1000, Loss: 63.5850510597229,  L-Loss: 6.03493979573249

In [168]:
eval_metrics(net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], 
                        [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'ham_los': 0.13558545524691357,
  'accuracy_score': 0.0,
  'micro_f1': 0.21290593505039193,
  'micro_p': 0.123779296875,
  'micro_r': 0.7605,
  'macro_f1': 0.029859431562645266,
  'macro_p': 0.018337673611111112,
  'macro_r': 0.14814814814814814},
 'dataset_1': {'ham_los': 0.13442804783950618,
  'accuracy_score': 0.0,
  'micro_f1': 0.21123372948500285,
  'micro_p': 0.12150065104166667,
  'micro_r': 0.8079004329004329,
  'macro_f1': 0.029287285999225632,
  'macro_p': 0.018000096450617283,
  'macro_r': 0.14814814814814814}}

In [153]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.07098765432098765, 'accuracy_score': 0.0},
 'dataset_1': {'hamming_loss': 0.06867283950617284, 'accuracy_score': 0.0}}

# .75 acc, .003 ham on val or die mf

In [131]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.0030864197530864196, 'accuracy_score': 0.75},
 'dataset_1': {'hamming_loss': 0.0, 'accuracy_score': 1.0}}

In [134]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.0030864197530864196, 'accuracy_score': 0.75},
 'dataset_1': {'hamming_loss': 0.0, 'accuracy_score': 1.0}}

In [28]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.07175925925925926, 'accuracy_score': 0.0},
 'dataset_1': {'hamming_loss': 0.06867283950617284, 'accuracy_score': 0.0}}

In [39]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.07175925925925926, 'accuracy_score': 0.0},
 'dataset_1': {'hamming_loss': 0.06867283950617284, 'accuracy_score': 0.0}}

In [49]:
eval_metrics(net, [hamming_loss, accuracy_score], [test_dataset, train_dataset], torch.device('cuda'))

{'dataset_0': {'hamming_loss': 0.07175925925925926, 'accuracy_score': 0.0},
 'dataset_1': {'hamming_loss': 0.06867283950617284, 'accuracy_score': 0.0}}