In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import TensorDataset, DataLoader 
from torch.utils.tensorboard import SummaryWriter

from C2AE import save_model, eval_metrics, load_model
from sklearn.metrics import hamming_loss, accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from skmultilearn.dataset import load_dataset

In [2]:
device = torch.device('cuda')
train_x, train_y, feat_names, label_names = load_dataset('mediamill', 'train')
test_x, test_y, _, _ = load_dataset('mediamill', 'test')

train_dataset = TensorDataset(torch.tensor(train_x.todense(), device=device, dtype=torch.float),torch.tensor(train_y.todense(), device=device,dtype=torch.float))
test_dataset = TensorDataset(torch.tensor(test_x.todense(), device=device, dtype=torch.float), torch.tensor(test_y.todense(), device=device, dtype=torch.float))

mediamill:train - exists, not redownloading
mediamill:test - exists, not redownloading


# Defining metrics.

In [3]:
def micro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='micro')
def macro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='macro')
def micro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='micro')
def macro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='macro')
def micro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='micro')
def macro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='macro')
def ham_los(*args, **kwargs):
    return hamming_loss(*args, **kwargs)

# Making TensorDatasets

In [4]:
train_x.shape, train_y.shape, test_x.shape, test_y.shape

((30993, 120), (30993, 101), (12914, 120), (12914, 101))

In [5]:
train_x.shape[0] + test_x.shape[0]

43907

In [8]:
class MediamillModel(torch.nn.Module):
    
    def __init__(self):
        super(MediamillModel, self).__init__()
        self.fc1 = torch.nn.Linear(120, 101)
    
    def forward(self, x):
        x = self.fc1(x)
        return x

In [14]:
num_epochs = 1000
lr = 0.001
batch_size=256

net = MediamillModel().to(device)
writer = SummaryWriter(comment='mediamill_fc')
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=lr)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [20]:
train_loss = []
test_loss = []

for epoch in range(0, 5000+1):    
    net.train()
    loss_tracker = 0.0
    for x, y in train_dataloader:
        optimizer.zero_grad()
        preds = net(x)
        loss = criterion(preds, y)
        loss.backward()
        optimizer.step()
        loss_tracker+=loss.item()
    train_loss.append(loss_tracker/len(train_dataloader))
    writer.add_scalar('train/loss', loss_tracker/len(train_dataloader), epoch)
    
    if epoch % 5 == 0:
        net.eval()
        test_tracker = 0.0
        for x, y in test_dataloader:
            preds = net(x)
            loss = criterion(preds, y)
            test_tracker += loss.item()
        test_loss.append(test_tracker/len(test_dataloader))
        writer.add_scalar('val/loss', test_tracker/len(test_dataloader), epoch)

        # Log all the good metrics to the board.
        mets = eval_metrics(net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], 
                                [test_dataset, train_dataset], device, apply_sig=True)
        for k, v in mets['dataset_1'].items():
            writer.add_scalar(f'train/{k}', v, epoch)
        # Train
        for k, v in mets['dataset_0'].items():
            writer.add_scalar(f'val/{k}', v, epoch)
        print("Epoch: {}, Train loss: {}, Test loss: {}".format(epoch, train_loss[-1], test_loss[-1]))
    torch.save(net.state_dict(), f'./models/mediamill/fc/{epoch}.pt')

Epoch: 4000, Train loss: 0.08613247754143886, Test loss: 0.09163471033760145
Epoch: 4005, Train loss: 0.08620843718774983, Test loss: 0.091657734969083
Epoch: 4010, Train loss: 0.08614608046949887, Test loss: 0.09172556198695127
Epoch: 4015, Train loss: 0.08625598538850175, Test loss: 0.09170330082084618
Epoch: 4020, Train loss: 0.08612408409597444, Test loss: 0.09185854681566649
Epoch: 4025, Train loss: 0.08617285584084323, Test loss: 0.09190288595124788
Epoch: 4030, Train loss: 0.08611986883839623, Test loss: 0.09186678730389651
Epoch: 4035, Train loss: 0.0860568668510093, Test loss: 0.09190276265144348
Epoch: 4040, Train loss: 0.08621608239949727, Test loss: 0.09193398204504274
Epoch: 4045, Train loss: 0.08611884255145417, Test loss: 0.0919204019740516
Epoch: 4050, Train loss: 0.08608640382280115, Test loss: 0.09185086687405904
Epoch: 4055, Train loss: 0.08604107162014382, Test loss: 0.09175187729152978
Epoch: 4060, Train loss: 0.08597204113592867, Test loss: 0.09188787478442285
Epo

### Picking best model

In [22]:
eval_net = load_model(MediamillModel, './models/mediamill/fc/5000.pt').to(device)

In [23]:
mets = eval_metrics(eval_net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], [test_dataset, train_dataset], device, apply_sig=True)
mets

  _warn_prf(average, modifier, msg_start, len(result))


{'dataset_0': {'ham_los': 0.030890567762057296,
  'accuracy_score': 0.08990243146972278,
  'micro_f1': 0.5416320633439893,
  'micro_p': 0.7679032258064517,
  'micro_r': 0.4183582010860969,
  'macro_f1': 0.06557044295932339,
  'macro_p': 0.2319994582527386,
  'macro_r': 0.05073817881601729},
 'dataset_1': {'ham_los': 0.02998569143527459,
  'accuracy_score': 0.08437389087858549,
  'micro_f1': 0.5517606945359732,
  'micro_p': 0.7787003464125409,
  'micro_r': 0.4272465740254554,
  'macro_f1': 0.08180357845370219,
  'macro_p': 0.3220003934810215,
  'macro_r': 0.06097828672199412}}