In [31]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn.functional as F
import torchvision as tv
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from C2AE import save_model, eval_metrics, load_model
from sklearn.metrics import hamming_loss, accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from skmultilearn.dataset import load_dataset

In [22]:
device = torch.device('cuda')
train_x, train_y, feat_names, label_names = load_dataset('scene', 'train')
test_x, test_y, _, _ = load_dataset('scene', 'test')

train_dataset = TensorDataset(torch.tensor(train_x.todense(), device=device, dtype=torch.float),torch.tensor(train_y.todense(), device=device,dtype=torch.float))
test_dataset = TensorDataset(torch.tensor(test_x.todense(), device=device, dtype=torch.float), torch.tensor(test_y.todense(), device=device, dtype=torch.float))

scene:train - exists, not redownloading
scene:test - exists, not redownloading


# Defining metrics.

In [23]:
def micro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='micro')
def macro_r(y_t, y_p):
    return recall_score(y_t, y_p, average='macro')
def micro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='micro')
def macro_p(y_t, y_p):
    return precision_score(y_t, y_p, average='macro')
def micro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='micro')
def macro_f1(y_t, y_p):
    return f1_score(y_t, y_p, average='macro')
def ham_los(*args, **kwargs):
    return hamming_loss(*args, **kwargs)

In [25]:
class SceneModel(torch.nn.Module):
    
    def __init__(self):
        super(SceneModel, self).__init__()
        self.dropout = torch.nn.Dropout(.3)
        self.fc1 = torch.nn.Linear(294, 6)
#         self.fc2 = torch.nn.Linear(100, 6)
    
    def forward(self, x):
#         x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc1(x)
        return x

##### Making TensorDatasets

In [28]:
num_epochs = 1000
lr = 0.0001
batch_size=4

net = SceneModel().to(device)
writer = SummaryWriter(comment='scene_fc')
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=lr)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
# train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=8)
# test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=2)

In [29]:
train_loss = []
test_loss = []

for epoch in range(num_epochs+1):
    
    net.train()
    loss_tracker = 0.0
    for x, y in train_dataloader:
        optimizer.zero_grad()
        preds = net(x)
        loss = criterion(preds, y)
        loss.backward()
        optimizer.step()
        loss_tracker+=loss.item()
    train_loss.append(loss_tracker/len(train_dataloader))
    writer.add_scalar('train/loss', loss_tracker/len(train_dataloader), epoch)
    
    if epoch % 5 == 0:
        net.eval()
        test_tracker = 0.0
        for x, y in test_dataloader:
            preds = net(x)
            loss = criterion(preds, y)
            test_tracker += loss.item()
        test_loss.append(test_tracker/len(test_dataloader))
        writer.add_scalar('val/loss', test_tracker/len(test_dataloader), epoch)

        # Log all the good metrics to the board.
        mets = eval_metrics(net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], 
                                [test_dataset, train_dataset], device, apply_sig=True)
        for k, v in mets['dataset_1'].items():
            writer.add_scalar(f'train/{k}', v, epoch)
        # Train
        for k, v in mets['dataset_0'].items():
            writer.add_scalar(f'val/{k}', v, epoch)
        print("Epoch: {}, Train loss: {}, Test loss: {}".format(epoch, train_loss[-1], test_loss[-1]))
    torch.save(net.state_dict(), f'./models/scene/scene_fc/v2{epoch}.pt')

  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 0, Train loss: 0.5828638191860501, Test loss: 0.4924984229448248
Epoch: 5, Train loss: 0.443063610752817, Test loss: 0.4396788216753548
Epoch: 10, Train loss: 0.41086887133003464, Test loss: 0.41074084876771755
Epoch: 15, Train loss: 0.38585507840213207, Test loss: 0.38832199802765477
Epoch: 20, Train loss: 0.36649243499186174, Test loss: 0.37092867284315484
Epoch: 25, Train loss: 0.3511511689857288, Test loss: 0.3571435017729284
Epoch: 30, Train loss: 0.33869536061196437, Test loss: 0.34598038532462805
Epoch: 35, Train loss: 0.3283617121502511, Test loss: 0.3367685298855887
Epoch: 40, Train loss: 0.31962910343711526, Test loss: 0.3290434890865881
Epoch: 45, Train loss: 0.3121326739835267, Test loss: 0.32247567630332447
Epoch: 50, Train loss: 0.30561053327502985, Test loss: 0.3168256175159212
Epoch: 55, Train loss: 0.2998703173038983, Test loss: 0.3119155426188855
Epoch: 60, Train loss: 0.2947677664040732, Test loss: 0.3076109081805749
Epoch: 65, Train loss: 0.2901926125236864, 

### Picking best model

In [33]:
eval_net = load_model(SceneModel, './models/scene/scene_fc/1000.pt').to(device)

In [34]:
mets = eval_metrics(eval_net, [ham_los, accuracy_score, micro_f1, micro_p, micro_r, macro_f1, macro_p, macro_r], [test_dataset, train_dataset], device, apply_sig=True)
mets

{'dataset_0': {'ham_los': 0.10632664437012262,
  'accuracy_score': 0.5008361204013378,
  'micro_f1': 0.6771053745239102,
  'micro_p': 0.7518796992481203,
  'micro_r': 0.6158583525789069,
  'macro_f1': 0.6846430359193443,
  'macro_p': 0.7655451158990486,
  'macro_r': 0.6203722514298878},
 'dataset_1': {'ham_los': 0.07280484448114506,
  'accuracy_score': 0.6374896779521056,
  'micro_f1': 0.7783829074151655,
  'micro_p': 0.8437783832879201,
  'micro_r': 0.7223950233281493,
  'macro_f1': 0.7878239730377405,
  'macro_p': 0.8501783860270625,
  'macro_r': 0.7372497400277535}}