In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime

import numpy as np

from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from keras.utils.np_utils import to_categorical

import matplotlib.pyplot as plt

from tensorflow.keras.callbacks import TensorBoard

import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F
import torch.nn as nn

Using TensorFlow backend.


In [3]:
from pytorch import torch_models
from data import data

In [4]:
IMG_LEN = 1024
TXT_LEN = 300
N_CLASSES = 50

In [5]:
x_img, x_txt, y = data.get_unpacked_data()

In [6]:
x_img_train, x_img_test, x_txt_train, x_txt_test, y_train, y_test = train_test_split(
    x_img, 
    x_txt, 
    y, 
    test_size=0.2, 
    random_state=42,
    stratify=y
)

x_img_train, x_img_val, x_txt_train, x_txt_val, y_train, y_val = train_test_split(
    x_img_train,
    x_txt_train,
    y_train,
    test_size=0.2,
    random_state=42,
    stratify=y_train
)

img_sscaler = StandardScaler()
img_sscaler.fit(x_img_train)

x_img_train = img_sscaler.transform(x_img_train)
x_img_val = img_sscaler.transform(x_img_val)
x_img_test = img_sscaler.transform(x_img_test)

txt_sscaler = StandardScaler()
txt_sscaler.fit(x_txt_train)

x_txt_train = txt_sscaler.transform(x_txt_train)
x_txt_val = txt_sscaler.transform(x_txt_val)
x_txt_test = txt_sscaler.transform(x_txt_test)

In [7]:
BATCH_SIZE = 512

x_img_train_t = torch.tensor(x_img_train)
x_img_val_t = torch.tensor(x_img_val)
x_img_test_t = torch.tensor(x_img_test)

x_txt_train_t = torch.tensor(x_txt_train)
x_txt_val_t = torch.tensor(x_txt_val)
x_txt_test_t = torch.tensor(x_txt_test)

y_train_t = torch.tensor(y_train)
y_val_t = torch.tensor(y_val)
y_test_t = torch.tensor(y_test)
    
train_ds = TensorDataset(x_img_train_t, x_txt_train_t, y_train_t)
val_ds = TensorDataset(x_img_val_t, x_txt_val_t, y_val_t)
test_ds = TensorDataset(x_img_test_t, x_txt_test_t, y_test_t)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)

In [8]:
model_attention_v1 = torch_models.SelfAttentionModel1()
optimizer = optim.Adam(model_attention_v1.parameters(), lr=1e-3)
writer = SummaryWriter('runs/self_attention_v1_bs512_rs42')

In [9]:
EPOCHS = 60

for epoch in range(EPOCHS):
    loss_sum = 0.0
    loss_count = 0
        
    for x_img_cur, x_txt_cur, y_cur in train_loader:
        model_attention_v1.zero_grad()
        output = model_attention_v1(x_img_cur.view(-1, IMG_LEN).float(), x_txt_cur.view(-1, TXT_LEN).float())
        loss = F.nll_loss(output, torch.argmax(y_cur, dim=1))
        loss.backward()
            
        loss_sum += loss
        loss_count += 1
            
        optimizer.step()

    print('epoch:', epoch, 'train_loss:', loss, 'average train loss', loss_sum/loss_count)
    writer.add_scalar('train_loss', loss, epoch)
    writer.add_scalar('avg_train_loss', loss_sum/loss_count, epoch)

    correct = 0
    total = 0
    loss_sum = 0.0
    loss_count = 0
        
    with torch.no_grad():
        for x_img_cur, x_txt_cur, y_cur in val_loader:
            output = model_attention_v1(x_img_cur.view(-1, IMG_LEN).float(), x_txt_cur.view(-1, TXT_LEN).float())
            loss = F.nll_loss(output, torch.argmax(y_cur, dim=1))
            loss_sum += loss
            loss_count += 1
            for idx, i in enumerate(output):
                if torch.argmax(i) == torch.argmax(y_cur, dim=1)[idx]:
                    correct += 1
                total += 1

    print('val_acc:', correct/total, 'val_avg_loss:', loss_sum/loss_count)
    writer.add_scalar('val_acc', correct/total, epoch)
    writer.add_scalar('val_avg_loss', loss_sum/loss_count, epoch)

epoch: 0 train_loss: tensor(3.3161, grad_fn=<NllLossBackward>) average train loss tensor(3.5828, grad_fn=<DivBackward0>)
val_acc: 0.10063281824871229 val_avg_loss: tensor(3.4450)
epoch: 1 train_loss: tensor(3.1592, grad_fn=<NllLossBackward>) average train loss tensor(3.3537, grad_fn=<DivBackward0>)
val_acc: 0.10990434142752023 val_avg_loss: tensor(3.3107)
epoch: 2 train_loss: tensor(3.3190, grad_fn=<NllLossBackward>) average train loss tensor(3.2236, grad_fn=<DivBackward0>)
val_acc: 0.1325091979396615 val_avg_loss: tensor(3.1952)
epoch: 3 train_loss: tensor(3.1255, grad_fn=<NllLossBackward>) average train loss tensor(3.1339, grad_fn=<DivBackward0>)
val_acc: 0.16176600441501104 val_avg_loss: tensor(3.0839)
epoch: 4 train_loss: tensor(3.0006, grad_fn=<NllLossBackward>) average train loss tensor(3.0035, grad_fn=<DivBackward0>)
val_acc: 0.18881530537159677 val_avg_loss: tensor(2.9689)
epoch: 5 train_loss: tensor(2.9385, grad_fn=<NllLossBackward>) average train loss tensor(2.8537, grad_fn=<

epoch: 46 train_loss: tensor(3.2198, grad_fn=<NllLossBackward>) average train loss tensor(2.1446, grad_fn=<DivBackward0>)
val_acc: 0.3640618101545254 val_avg_loss: tensor(2.3183)
epoch: 47 train_loss: tensor(2.3505, grad_fn=<NllLossBackward>) average train loss tensor(2.2173, grad_fn=<DivBackward0>)
val_acc: 0.37654157468727006 val_avg_loss: tensor(2.2927)
epoch: 48 train_loss: tensor(2.4119, grad_fn=<NllLossBackward>) average train loss tensor(2.1355, grad_fn=<DivBackward0>)
val_acc: 0.3831346578366446 val_avg_loss: tensor(2.2590)
epoch: 49 train_loss: tensor(2.3217, grad_fn=<NllLossBackward>) average train loss tensor(2.1063, grad_fn=<DivBackward0>)
val_acc: 0.39311258278145694 val_avg_loss: tensor(2.2494)
epoch: 50 train_loss: tensor(2.2029, grad_fn=<NllLossBackward>) average train loss tensor(2.0799, grad_fn=<DivBackward0>)
val_acc: 0.3958204562178072 val_avg_loss: tensor(2.2438)
epoch: 51 train_loss: tensor(4.0609, grad_fn=<NllLossBackward>) average train loss tensor(3.1421, grad_

In [15]:
model_ua_v1 = torch_models.UAModel1()
optimizer = optim.Adam(model_ua_v1.parameters(), lr=1e-3)
writer = SummaryWriter('runs/ua_v1_bs512_rs42')

In [None]:
EPOCHS = 60

for epoch in range(EPOCHS):
    loss_sum = 0.0
    loss_count = 0
        
    for x_img_cur, x_txt_cur, y_cur in train_loader:
        model_ua_v1.zero_grad()
        output = model_ua_v1(x_img_cur.view(-1, IMG_LEN).float(), x_txt_cur.view(-1, TXT_LEN).float())
        loss = F.nll_loss(output, torch.argmax(y_cur, dim=1))
        loss.backward()
            
        loss_sum += loss
        loss_count += 1
            
        optimizer.step()

    print('epoch:', epoch, 'train_loss:', loss, 'average train loss', loss_sum/loss_count)
    writer.add_scalar('train_loss', loss, epoch)
    writer.add_scalar('avg_train_loss', loss_sum/loss_count, epoch)

    correct = 0
    total = 0
    loss_sum = 0.0
    loss_count = 0
        
    with torch.no_grad():
        for x_img_cur, x_txt_cur, y_cur in val_loader:
            output = model_ua_v1(x_img_cur.view(-1, IMG_LEN).float(), x_txt_cur.view(-1, TXT_LEN).float())
            loss = F.nll_loss(output, torch.argmax(y_cur, dim=1))
            loss_sum += loss
            loss_count += 1
            for idx, i in enumerate(output):
                if torch.argmax(i) == torch.argmax(y_cur, dim=1)[idx]:
                    correct += 1
                total += 1

    print('val_acc:', correct/total, 'val_avg_loss:', loss_sum/loss_count)
    writer.add_scalar('val_acc', correct/total, epoch)
    writer.add_scalar('val_avg_loss', loss_sum/loss_count, epoch)

epoch: 0 train_loss: tensor(1.2151, grad_fn=<NllLossBackward>) average train loss tensor(1.6174, grad_fn=<DivBackward0>)
val_acc: 0.5988226637233259 val_avg_loss: tensor(1.4589)
epoch: 1 train_loss: tensor(1.0648, grad_fn=<NllLossBackward>) average train loss tensor(1.3605, grad_fn=<DivBackward0>)
val_acc: 0.6077115526122149 val_avg_loss: tensor(1.4206)
epoch: 2 train_loss: tensor(0.9884, grad_fn=<NllLossBackward>) average train loss tensor(1.2905, grad_fn=<DivBackward0>)
val_acc: 0.6070640176600441 val_avg_loss: tensor(1.4223)
epoch: 3 train_loss: tensor(0.8413, grad_fn=<NllLossBackward>) average train loss tensor(1.2334, grad_fn=<DivBackward0>)
val_acc: 0.6102722590139809 val_avg_loss: tensor(1.4242)
epoch: 4 train_loss: tensor(0.7145, grad_fn=<NllLossBackward>) average train loss tensor(1.1803, grad_fn=<DivBackward0>)
val_acc: 0.6082119205298013 val_avg_loss: tensor(1.4456)
epoch: 5 train_loss: tensor(0.6366, grad_fn=<NllLossBackward>) average train loss tensor(1.1276, grad_fn=<DivB

epoch: 46 train_loss: tensor(0.0725, grad_fn=<NllLossBackward>) average train loss tensor(0.2710, grad_fn=<DivBackward0>)
val_acc: 0.5374834437086092 val_avg_loss: tensor(4.3104)
epoch: 47 train_loss: tensor(0.1542, grad_fn=<NllLossBackward>) average train loss tensor(0.2884, grad_fn=<DivBackward0>)
val_acc: 0.5388962472406181 val_avg_loss: tensor(4.3034)
epoch: 48 train_loss: tensor(0.1144, grad_fn=<NllLossBackward>) average train loss tensor(0.3179, grad_fn=<DivBackward0>)
val_acc: 0.5448123620309051 val_avg_loss: tensor(4.2255)
epoch: 49 train_loss: tensor(0.1527, grad_fn=<NllLossBackward>) average train loss tensor(0.2672, grad_fn=<DivBackward0>)
val_acc: 0.5420456217807211 val_avg_loss: tensor(4.2607)
epoch: 50 train_loss: tensor(0.1054, grad_fn=<NllLossBackward>) average train loss tensor(0.2438, grad_fn=<DivBackward0>)
val_acc: 0.5445768947755703 val_avg_loss: tensor(4.3671)
epoch: 51 train_loss: tensor(0.0852, grad_fn=<NllLossBackward>) average train loss tensor(0.2502, grad_fn