In [1]:
%load_ext autoreload
%autoreload 2

In [67]:
import time
import copy

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F
import torch.nn as nn

import pickle

import numpy as np

In [3]:
IMG_LEN = 1024
TXT_LEN = 300
N_CLASSES = 50

In [4]:
import sys
sys.path.append('./data')

In [5]:
from data import data

Using TensorFlow backend.


In [6]:
x_img, x_txt, y = data.get_unpacked_data()

In [7]:
x_img_train, x_img_test, x_txt_train, x_txt_test, y_train, y_test = train_test_split(
    x_img, 
    x_txt, 
    y, 
    test_size=0.2, 
    random_state=42,
    stratify=y
)

x_img_train, x_img_val, x_txt_train, x_txt_val, y_train, y_val = train_test_split(
    x_img_train,
    x_txt_train,
    y_train,
    test_size=0.2,
    random_state=42,
    stratify=y_train
)

img_sscaler = StandardScaler()
img_sscaler.fit(x_img_train)

x_img_train = img_sscaler.transform(x_img_train)
x_img_val = img_sscaler.transform(x_img_val)
x_img_test = img_sscaler.transform(x_img_test)

txt_sscaler = StandardScaler()
txt_sscaler.fit(x_txt_train)

x_txt_train = txt_sscaler.transform(x_txt_train)
x_txt_val = txt_sscaler.transform(x_txt_val)
x_txt_test = txt_sscaler.transform(x_txt_test)

x_img_train_t = torch.tensor(x_img_train).float()
x_img_val_t = torch.tensor(x_img_val).float()
x_img_test_t = torch.tensor(x_img_test).float()

x_txt_train_t = torch.tensor(x_txt_train).float()
x_txt_val_t = torch.tensor(x_txt_val).float()
x_txt_test_t = torch.tensor(x_txt_test).float()

y_train_t = torch.tensor(y_train).float()
y_val_t = torch.tensor(y_val).float()
y_test_t = torch.tensor(y_test).float()

train_ds = TensorDataset(x_img_train_t, x_txt_train_t, y_train_t)
val_ds = TensorDataset(x_img_val_t, x_txt_val_t, y_val_t)
test_ds = TensorDataset(x_img_test_t, x_txt_test_t, y_test_t)

BATCH_SIZE = 2048

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)

## Encoder with one hidden layer

In [8]:
class Encoder(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.fc_img = nn.Linear(IMG_LEN, d)
        self.fc_txt = nn.Linear(TXT_LEN, d)
        
    def forward(self, inp_img, inp_txt):
        x_img = self.fc_img(inp_img)
        x_txt = self.fc_txt(inp_txt)
        
        x = torch.cat((x_img, x_txt), 1)
        x = F.relu(x)
        return x
    
class Decoder(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.fc_img = nn.Linear(2 * d, IMG_LEN)
        self.fc_txt = nn.Linear(2 * d, TXT_LEN)
    
    def forward(self, x):
        x_img = self.fc_img(x)
        x_txt = self.fc_txt(x)
        return x_img, x_txt

class Autoencoder(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.encoder = Encoder(d)
        self.decoder = Decoder(d)
        
    def forward(self, inp_img, inp_txt):
        x = self.encoder(inp_img, inp_txt)
        x_img, x_txt = self.decoder(x)
        return x_img, x_txt

In [51]:
class EncoderConcat(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.fc_img = nn.Linear(IMG_LEN, d)
        self.fc_txt = nn.Linear(TXT_LEN, d)
        
    def forward(self, inp_img, inp_txt):
        x_img = self.fc_img(inp_img)
        x_txt = self.fc_txt(inp_txt)
        
        x = torch.cat((x_img, x_txt), 1)
        return x
    
class DecoderConcat(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.fc_img = nn.Linear(2 * d, IMG_LEN)
        self.fc_txt = nn.Linear(2 * d, TXT_LEN)
    
    def forward(self, x):
        x_img = self.fc_img(x)
        x_txt = self.fc_txt(x)
        return x_img, x_txt

class AutoencoderConcat(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.encoder = EncoderConcat(d)
        self.decoder = DecoderConcat(d)
        
    def forward(self, inp_img, inp_txt):
        x = self.encoder(inp_img, inp_txt)
        x_img, x_txt = self.decoder(x)
        return x_img, x_txt

In [39]:
class EncoderFixed(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.fc_img = nn.Linear(IMG_LEN, d)
        self.fc_txt = nn.Linear(TXT_LEN, d)
        self.fc = nn.Linear(2 * d, 2 * d)
        
    def forward(self, inp_img, inp_txt):
        x_img = self.fc_img(inp_img)
        x_txt = self.fc_txt(inp_txt)
        
        x = self.fc(torch.cat((x_img, x_txt), 1))
        return x
    
class DecoderFixed(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.fc_img = nn.Linear(2 * d, IMG_LEN)
        self.fc_txt = nn.Linear(2 * d, TXT_LEN)
    
    def forward(self, x):
        x_img = self.fc_img(x)
        x_txt = self.fc_txt(x)
        return x_img, x_txt

class AutoencoderFixed(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.encoder = EncoderFixed(d)
        self.decoder = DecoderFixed(d)
        
    def forward(self, inp_img, inp_txt):
        x = self.encoder(inp_img, inp_txt)
        x_img, x_txt = self.decoder(x)
        return x_img, x_txt

In [48]:
class EncoderRelu(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.fc_img = nn.Linear(IMG_LEN, d)
        self.fc_txt = nn.Linear(TXT_LEN, d)
        self.fc = nn.Linear(2 * d, 2 * d)
        
    def forward(self, inp_img, inp_txt):
        x_img = F.relu(self.fc_img(inp_img))
        x_txt = F.relu(self.fc_txt(inp_txt))
        x = F.relu(self.fc(torch.cat((x_img, x_txt), 1)))
        return x
    
class DecoderRelu(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.fc_img = nn.Linear(2 * d, IMG_LEN)
        self.fc_txt = nn.Linear(2 * d, TXT_LEN)
    
    def forward(self, x):
        x_img = self.fc_img(x)
        x_txt = self.fc_txt(x)
        return x_img, x_txt

class AutoencoderRelu(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.encoder = EncoderRelu(d)
        self.decoder = DecoderRelu(d)
        
    def forward(self, inp_img, inp_txt):
        x = self.encoder(inp_img, inp_txt)
        x_img, x_txt = self.decoder(x)
        return x_img, x_txt

In [58]:
BATCH_SIZE = 2048
criterion = nn.MSELoss()

def fit_autoencoder(autoencoder, optimizer, epochs, X_train, X_val, verbose=1):
    
    x_img_train, x_txt_train = X_train[0], X_train[1]
    x_img_val, x_txt_val = X_val[0], X_val[1]
    
    x_img_train_t = torch.tensor(x_img_train).float()
    x_img_val_t = torch.tensor(x_img_val).float()

    x_txt_train_t = torch.tensor(x_txt_train).float()
    x_txt_val_t = torch.tensor(x_txt_val).float()
    
    train_ds = TensorDataset(x_img_train_t, x_txt_train_t)
    val_ds = TensorDataset(x_img_val_t, x_txt_val_t)
    
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
    
    train_img_loss_history = []
    train_txt_loss_history = []
    
    val_img_loss_history = []
    val_txt_loss_history = []
    
    start_time = time.time()
    
    for epoch in range(epochs):
        autoencoder.train()
    
        loss_img_sum = 0.0
        loss_txt_sum = 0.0
        loss_sum = 0.0
        loss_count = 0
    
        for x_img_cur, x_txt_cur in train_loader:
            autoencoder.zero_grad()
            out_img, out_txt = autoencoder(inp_img=x_img_cur, inp_txt=x_txt_cur)
            loss_img = criterion(out_img, x_img_cur)
            loss_txt = criterion(out_txt, x_txt_cur)
            loss = loss_img + loss_txt
        
            loss_img_sum += loss_img
            loss_txt_sum += loss_txt
            loss_sum += loss
            loss_count += 1
        
            loss.backward()
            optimizer.step()
         
        if verbose != 0:
            print(
                'train img loss:', "%.3f" % (loss_img_sum/loss_count).item(), 
                'txt_loss:', "%.3f" % (loss_txt_sum/loss_count).item(), 
                'img + txt loss', "%.3f" % (loss_sum/loss_count).item()
            )
        train_img_loss_history.append((loss_img_sum/loss_count).item())
        train_txt_loss_history.append((loss_txt_sum/loss_count).item())
        
        autoencoder.eval()
    
        val_loss_img_sum = 0.0
        val_loss_txt_sum = 0.0
        val_loss_sum = 0.0
        val_loss_count = 0
    
        with torch.no_grad():
            for x_img_cur, x_txt_cur in val_loader:
                out_img, out_txt = autoencoder(x_img_cur, x_txt_cur)
                loss_img = criterion(out_img, x_img_cur)
                loss_txt = criterion(out_txt, x_txt_cur)
                loss = loss_img + loss_txt
            
                val_loss_img_sum += loss_img
                val_loss_txt_sum += loss_txt
                val_loss_sum += loss
                val_loss_count += 1
        
        if verbose != 0:
            print(
                'val img loss:', "%.3f" % (val_loss_img_sum/val_loss_count).item(), 
                'val txt_loss:', "%.3f" % (val_loss_txt_sum/val_loss_count).item(), 
                'img + txt loss', "%.3f" % (val_loss_sum/val_loss_count).item()
            )
        val_img_loss_history.append((val_loss_img_sum/val_loss_count).item())
        val_txt_loss_history.append((val_loss_txt_sum/val_loss_count).item())
        
    operation_time = time.time() - start_time
    
    if verbose != 0:
        print('autoencoder fitting finished for', operation_time, 'seconds')
        
    return train_img_loss_history, train_txt_loss_history, val_img_loss_history, val_txt_loss_history, operation_time

In [39]:
EPOCHS = 100

In [40]:
def test_trivial_autoencoder(d):
    print('TESTING WITH d = ', d)
    autoencoder = Autoencoder(d=d)
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr=1e-3)

    stat = fit_autoencoder(autoencoder, optimizer, EPOCHS, [x_img_train, x_txt_train], [x_img_val, x_txt_val])
    pickle.dump(stat, open( "autoencoder_stat/trivial_" + str(d) + ".pickle", "wb"))

## Width of hidden layer = 8

In [55]:
test_trivial_autoencoder(8)

TESTING WITH d =  8
train img loss: 1.010 txt_loss: 1.006 img + txt loss 2.016
val img loss: 0.245 val txt_loss: 0.245 img + txt loss 0.490
train img loss: 0.918 txt_loss: 0.917 img + txt loss 1.835
val img loss: 0.223 val txt_loss: 0.223 img + txt loss 0.446
train img loss: 0.856 txt_loss: 0.849 img + txt loss 1.706
val img loss: 0.211 val txt_loss: 0.209 img + txt loss 0.421
train img loss: 0.816 txt_loss: 0.806 img + txt loss 1.622
val img loss: 0.203 val txt_loss: 0.201 img + txt loss 0.403
train img loss: 0.786 txt_loss: 0.780 img + txt loss 1.566
val img loss: 0.196 val txt_loss: 0.195 img + txt loss 0.392
train img loss: 0.765 txt_loss: 0.764 img + txt loss 1.529
val img loss: 0.192 val txt_loss: 0.192 img + txt loss 0.384
train img loss: 0.749 txt_loss: 0.754 img + txt loss 1.503
val img loss: 0.188 val txt_loss: 0.190 img + txt loss 0.378
train img loss: 0.737 txt_loss: 0.746 img + txt loss 1.483
val img loss: 0.186 val txt_loss: 0.188 img + txt loss 0.374
train img loss: 0.72

val img loss: 0.174 val txt_loss: 0.178 img + txt loss 0.353
train img loss: 0.687 txt_loss: 0.704 img + txt loss 1.391
val img loss: 0.174 val txt_loss: 0.178 img + txt loss 0.353
train img loss: 0.687 txt_loss: 0.704 img + txt loss 1.391
val img loss: 0.174 val txt_loss: 0.178 img + txt loss 0.353
train img loss: 0.687 txt_loss: 0.704 img + txt loss 1.391
val img loss: 0.174 val txt_loss: 0.178 img + txt loss 0.353
train img loss: 0.687 txt_loss: 0.704 img + txt loss 1.391
val img loss: 0.174 val txt_loss: 0.178 img + txt loss 0.353
train img loss: 0.687 txt_loss: 0.704 img + txt loss 1.391
val img loss: 0.174 val txt_loss: 0.178 img + txt loss 0.353
train img loss: 0.687 txt_loss: 0.704 img + txt loss 1.391
val img loss: 0.174 val txt_loss: 0.178 img + txt loss 0.353
train img loss: 0.687 txt_loss: 0.704 img + txt loss 1.391
val img loss: 0.174 val txt_loss: 0.178 img + txt loss 0.353
train img loss: 0.687 txt_loss: 0.704 img + txt loss 1.391
val img loss: 0.174 val txt_loss: 0.178 

## Width of hidden layer = 16

In [56]:
test_trivial_autoencoder(16)

TESTING WITH d =  16
train img loss: 0.963 txt_loss: 0.960 img + txt loss 1.923
val img loss: 0.222 val txt_loss: 0.221 img + txt loss 0.443
train img loss: 0.820 txt_loss: 0.812 img + txt loss 1.632
val img loss: 0.195 val txt_loss: 0.194 img + txt loss 0.389
train img loss: 0.735 txt_loss: 0.736 img + txt loss 1.471
val img loss: 0.179 val txt_loss: 0.181 img + txt loss 0.360
train img loss: 0.686 txt_loss: 0.699 img + txt loss 1.385
val img loss: 0.169 val txt_loss: 0.174 img + txt loss 0.344
train img loss: 0.655 txt_loss: 0.679 img + txt loss 1.334
val img loss: 0.163 val txt_loss: 0.170 img + txt loss 0.333
train img loss: 0.637 txt_loss: 0.666 img + txt loss 1.303
val img loss: 0.160 val txt_loss: 0.167 img + txt loss 0.327
train img loss: 0.626 txt_loss: 0.657 img + txt loss 1.284
val img loss: 0.158 val txt_loss: 0.166 img + txt loss 0.324
train img loss: 0.620 txt_loss: 0.651 img + txt loss 1.271
val img loss: 0.156 val txt_loss: 0.164 img + txt loss 0.321
train img loss: 0.6

val img loss: 0.147 val txt_loss: 0.155 img + txt loss 0.302
train img loss: 0.580 txt_loss: 0.613 img + txt loss 1.194
val img loss: 0.147 val txt_loss: 0.155 img + txt loss 0.302
train img loss: 0.580 txt_loss: 0.613 img + txt loss 1.194
val img loss: 0.147 val txt_loss: 0.155 img + txt loss 0.302
train img loss: 0.580 txt_loss: 0.613 img + txt loss 1.194
val img loss: 0.147 val txt_loss: 0.155 img + txt loss 0.302
train img loss: 0.580 txt_loss: 0.613 img + txt loss 1.194
val img loss: 0.147 val txt_loss: 0.155 img + txt loss 0.302
train img loss: 0.580 txt_loss: 0.613 img + txt loss 1.193
val img loss: 0.147 val txt_loss: 0.155 img + txt loss 0.302
train img loss: 0.580 txt_loss: 0.613 img + txt loss 1.193
val img loss: 0.147 val txt_loss: 0.155 img + txt loss 0.302
train img loss: 0.580 txt_loss: 0.613 img + txt loss 1.193
val img loss: 0.147 val txt_loss: 0.155 img + txt loss 0.302
train img loss: 0.580 txt_loss: 0.613 img + txt loss 1.193
val img loss: 0.147 val txt_loss: 0.155 

## Width of hidden layer = 32

In [57]:
test_trivial_autoencoder(32)

TESTING WITH d =  32
train img loss: 0.903 txt_loss: 0.890 img + txt loss 1.793
val img loss: 0.198 val txt_loss: 0.193 img + txt loss 0.391
train img loss: 0.711 txt_loss: 0.700 img + txt loss 1.411
val img loss: 0.165 val txt_loss: 0.166 img + txt loss 0.331
train img loss: 0.613 txt_loss: 0.630 img + txt loss 1.243
val img loss: 0.147 val txt_loss: 0.154 img + txt loss 0.301
train img loss: 0.559 txt_loss: 0.593 img + txt loss 1.152
val img loss: 0.137 val txt_loss: 0.147 img + txt loss 0.284
train img loss: 0.529 txt_loss: 0.573 img + txt loss 1.102
val img loss: 0.132 val txt_loss: 0.143 img + txt loss 0.275
train img loss: 0.513 txt_loss: 0.561 img + txt loss 1.074
val img loss: 0.129 val txt_loss: 0.141 img + txt loss 0.270
train img loss: 0.503 txt_loss: 0.553 img + txt loss 1.055
val img loss: 0.126 val txt_loss: 0.139 img + txt loss 0.265
train img loss: 0.495 txt_loss: 0.545 img + txt loss 1.040
val img loss: 0.125 val txt_loss: 0.137 img + txt loss 0.262
train img loss: 0.4

val img loss: 0.115 val txt_loss: 0.129 img + txt loss 0.244
train img loss: 0.456 txt_loss: 0.507 img + txt loss 0.963
val img loss: 0.116 val txt_loss: 0.129 img + txt loss 0.244
train img loss: 0.456 txt_loss: 0.507 img + txt loss 0.963
val img loss: 0.115 val txt_loss: 0.129 img + txt loss 0.244
train img loss: 0.456 txt_loss: 0.507 img + txt loss 0.963
val img loss: 0.115 val txt_loss: 0.129 img + txt loss 0.244
train img loss: 0.456 txt_loss: 0.507 img + txt loss 0.963
val img loss: 0.115 val txt_loss: 0.129 img + txt loss 0.244
train img loss: 0.456 txt_loss: 0.507 img + txt loss 0.963
val img loss: 0.115 val txt_loss: 0.129 img + txt loss 0.244
train img loss: 0.456 txt_loss: 0.507 img + txt loss 0.963
val img loss: 0.115 val txt_loss: 0.129 img + txt loss 0.244
train img loss: 0.456 txt_loss: 0.507 img + txt loss 0.963
val img loss: 0.115 val txt_loss: 0.129 img + txt loss 0.244
train img loss: 0.456 txt_loss: 0.507 img + txt loss 0.963
val img loss: 0.115 val txt_loss: 0.129 

## Width of hidden layer = 64

In [58]:
test_trivial_autoencoder(64)

TESTING WITH d =  64
train img loss: 0.811 txt_loss: 0.802 img + txt loss 1.613
val img loss: 0.164 val txt_loss: 0.164 img + txt loss 0.328
train img loss: 0.568 txt_loss: 0.583 img + txt loss 1.151
val img loss: 0.128 val txt_loss: 0.136 img + txt loss 0.263
train img loss: 0.465 txt_loss: 0.509 img + txt loss 0.974
val img loss: 0.110 val txt_loss: 0.124 img + txt loss 0.233
train img loss: 0.412 txt_loss: 0.475 img + txt loss 0.887
val img loss: 0.100 val txt_loss: 0.118 img + txt loss 0.218
train img loss: 0.385 txt_loss: 0.457 img + txt loss 0.842
val img loss: 0.096 val txt_loss: 0.114 img + txt loss 0.210
train img loss: 0.370 txt_loss: 0.444 img + txt loss 0.815
val img loss: 0.093 val txt_loss: 0.111 img + txt loss 0.204
train img loss: 0.360 txt_loss: 0.434 img + txt loss 0.794
val img loss: 0.090 val txt_loss: 0.109 img + txt loss 0.199
train img loss: 0.351 txt_loss: 0.425 img + txt loss 0.776
val img loss: 0.088 val txt_loss: 0.107 img + txt loss 0.195
train img loss: 0.3

val img loss: 0.080 val txt_loss: 0.097 img + txt loss 0.176
train img loss: 0.314 txt_loss: 0.381 img + txt loss 0.695
val img loss: 0.080 val txt_loss: 0.097 img + txt loss 0.176
train img loss: 0.314 txt_loss: 0.381 img + txt loss 0.695
val img loss: 0.080 val txt_loss: 0.097 img + txt loss 0.177
train img loss: 0.314 txt_loss: 0.381 img + txt loss 0.695
val img loss: 0.080 val txt_loss: 0.097 img + txt loss 0.176
train img loss: 0.314 txt_loss: 0.381 img + txt loss 0.695
val img loss: 0.080 val txt_loss: 0.097 img + txt loss 0.177
train img loss: 0.314 txt_loss: 0.381 img + txt loss 0.695
val img loss: 0.080 val txt_loss: 0.097 img + txt loss 0.176
train img loss: 0.314 txt_loss: 0.381 img + txt loss 0.695
val img loss: 0.080 val txt_loss: 0.097 img + txt loss 0.176
train img loss: 0.314 txt_loss: 0.381 img + txt loss 0.695
val img loss: 0.080 val txt_loss: 0.097 img + txt loss 0.176
train img loss: 0.314 txt_loss: 0.381 img + txt loss 0.695
val img loss: 0.080 val txt_loss: 0.097 

## Width of hidden layer = 128

In [59]:
test_trivial_autoencoder(128)

TESTING WITH d =  128
train img loss: 0.692 txt_loss: 0.717 img + txt loss 1.409
val img loss: 0.125 val txt_loss: 0.136 img + txt loss 0.261
train img loss: 0.407 txt_loss: 0.462 img + txt loss 0.869
val img loss: 0.086 val txt_loss: 0.103 img + txt loss 0.189
train img loss: 0.302 txt_loss: 0.376 img + txt loss 0.678
val img loss: 0.069 val txt_loss: 0.089 img + txt loss 0.158
train img loss: 0.254 txt_loss: 0.338 img + txt loss 0.592
val img loss: 0.061 val txt_loss: 0.083 img + txt loss 0.144
train img loss: 0.230 txt_loss: 0.317 img + txt loss 0.546
val img loss: 0.056 val txt_loss: 0.078 img + txt loss 0.135
train img loss: 0.215 txt_loss: 0.302 img + txt loss 0.516
val img loss: 0.053 val txt_loss: 0.075 img + txt loss 0.128
train img loss: 0.204 txt_loss: 0.289 img + txt loss 0.493
val img loss: 0.051 val txt_loss: 0.072 img + txt loss 0.123
train img loss: 0.196 txt_loss: 0.278 img + txt loss 0.474
val img loss: 0.049 val txt_loss: 0.069 img + txt loss 0.119
train img loss: 0.

val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.162 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.162 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.162 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.162 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.162 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.162 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.162 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 

## Width of hidden layer = 256

In [60]:
test_trivial_autoencoder(256)

TESTING WITH d =  256
train img loss: 0.563 txt_loss: 0.616 img + txt loss 1.180
val img loss: 0.087 val txt_loss: 0.103 img + txt loss 0.190
train img loss: 0.266 txt_loss: 0.324 img + txt loss 0.589
val img loss: 0.054 val txt_loss: 0.067 img + txt loss 0.121
train img loss: 0.187 txt_loss: 0.228 img + txt loss 0.415
val img loss: 0.042 val txt_loss: 0.051 img + txt loss 0.093
train img loss: 0.153 txt_loss: 0.180 img + txt loss 0.333
val img loss: 0.036 val txt_loss: 0.042 img + txt loss 0.078
train img loss: 0.134 txt_loss: 0.151 img + txt loss 0.284
val img loss: 0.032 val txt_loss: 0.036 img + txt loss 0.068
train img loss: 0.121 txt_loss: 0.131 img + txt loss 0.252
val img loss: 0.030 val txt_loss: 0.031 img + txt loss 0.061
train img loss: 0.111 txt_loss: 0.116 img + txt loss 0.228
val img loss: 0.027 val txt_loss: 0.028 img + txt loss 0.056
train img loss: 0.104 txt_loss: 0.105 img + txt loss 0.208
val img loss: 0.026 val txt_loss: 0.025 img + txt loss 0.051
train img loss: 0.

val img loss: 0.018 val txt_loss: 0.010 img + txt loss 0.029
train img loss: 0.071 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.018 val txt_loss: 0.010 img + txt loss 0.029
train img loss: 0.071 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.018 val txt_loss: 0.010 img + txt loss 0.029
train img loss: 0.072 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.018 val txt_loss: 0.010 img + txt loss 0.029
train img loss: 0.071 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.018 val txt_loss: 0.010 img + txt loss 0.029
train img loss: 0.072 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.018 val txt_loss: 0.010 img + txt loss 0.029
train img loss: 0.071 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.018 val txt_loss: 0.010 img + txt loss 0.029
train img loss: 0.072 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.019 val txt_loss: 0.010 img + txt loss 0.029
train img loss: 0.071 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.018 val txt_loss: 0.010 

## Width of hidden layer = 512

In [61]:
test_trivial_autoencoder(512)

TESTING WITH d =  512
train img loss: 0.454 txt_loss: 0.508 img + txt loss 0.962
val img loss: 0.059 val txt_loss: 0.069 img + txt loss 0.127
train img loss: 0.180 txt_loss: 0.189 img + txt loss 0.369
val img loss: 0.037 val txt_loss: 0.034 img + txt loss 0.072
train img loss: 0.131 txt_loss: 0.107 img + txt loss 0.238
val img loss: 0.030 val txt_loss: 0.023 img + txt loss 0.053
train img loss: 0.108 txt_loss: 0.078 img + txt loss 0.186
val img loss: 0.026 val txt_loss: 0.018 img + txt loss 0.044
train img loss: 0.093 txt_loss: 0.064 img + txt loss 0.157
val img loss: 0.022 val txt_loss: 0.015 img + txt loss 0.038
train img loss: 0.082 txt_loss: 0.055 img + txt loss 0.136
val img loss: 0.020 val txt_loss: 0.013 img + txt loss 0.033
train img loss: 0.073 txt_loss: 0.046 img + txt loss 0.119
val img loss: 0.018 val txt_loss: 0.011 img + txt loss 0.029
train img loss: 0.065 txt_loss: 0.038 img + txt loss 0.104
val img loss: 0.016 val txt_loss: 0.009 img + txt loss 0.025
train img loss: 0.

val img loss: 0.008 val txt_loss: 0.000 img + txt loss 0.009
train img loss: 0.032 txt_loss: 0.001 img + txt loss 0.033
val img loss: 0.008 val txt_loss: 0.000 img + txt loss 0.009
train img loss: 0.031 txt_loss: 0.001 img + txt loss 0.033
val img loss: 0.008 val txt_loss: 0.001 img + txt loss 0.009
train img loss: 0.032 txt_loss: 0.001 img + txt loss 0.033
val img loss: 0.009 val txt_loss: 0.000 img + txt loss 0.009
train img loss: 0.031 txt_loss: 0.001 img + txt loss 0.033
val img loss: 0.008 val txt_loss: 0.000 img + txt loss 0.009
train img loss: 0.032 txt_loss: 0.001 img + txt loss 0.033
val img loss: 0.008 val txt_loss: 0.000 img + txt loss 0.009
train img loss: 0.032 txt_loss: 0.001 img + txt loss 0.033
val img loss: 0.008 val txt_loss: 0.000 img + txt loss 0.009
train img loss: 0.031 txt_loss: 0.001 img + txt loss 0.033
val img loss: 0.008 val txt_loss: 0.001 img + txt loss 0.009
train img loss: 0.031 txt_loss: 0.001 img + txt loss 0.033
val img loss: 0.009 val txt_loss: 0.000 

In [13]:
encoder = autoencoder.encoder

In [17]:
x_img_train_e = torch.tensor(x_img_train[:2600]).float()
x_txt_train_e = torch.tensor(x_txt_train[:2600]).float()

out = encoder(x_img_train_e, x_txt_train_e)

## Using encoder outputs as model inputs

In [None]:
autoencoder_128 = Autoencoder(d=128)
optimizer_128 = torch.optim.Adam(autoencoder_128.parameters(), lr=1e-3)

stat = fit_autoencoder(autoencoder_128, optimizer_128, 100, [x_img_train, x_txt_train], [x_img_val, x_txt_val])

In [22]:
class AfterEncoderModel(nn.Module):
    def __init__(self, encoder, d=128, drop=0.5):
        super().__init__()
        
        self.encoder = encoder
            
        self.fc1 = nn.Linear(d * 2, d)
        self.fc2 = nn.Linear(d, d)
        self.out = nn.Linear(d, N_CLASSES)

        self.dropout = nn.modules.Dropout(p=drop)

    def forward(self, inp_img, inp_txt):
        x = F.relu(self.encoder(inp_img, inp_txt))
        x = self.dropout(x)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc2(x))
        x = F.log_softmax(self.out(x), dim=1)
        return x

In [23]:
import pytorch.torch_models as torch_models

encoder_128 = autoencoder_128.encoder
after_encoder_model_128 = AfterEncoderModel(encoder_128, d=128, drop=0.5)
optimizer_aem_128 = torch.optim.Adam(after_encoder_model_128.parameters(), lr=1e-3)
writer = SummaryWriter('runs/aem_128_bs2048_rs42_d128_wd0005_drop05_100')

torch_models.fit_topics_model(
    model=after_encoder_model_128,
    optimizer=optimizer_aem_128,
    epochs=100,
    writer=writer,
    train_loader=train_loader,
    val_loader=val_loader
)

epoch: 0 train_loss: tensor(2.2405, grad_fn=<NllLossBackward>) average train loss tensor(2.8921, grad_fn=<DivBackward0>)
val_acc: 0.4850331125827815 val_avg_loss: tensor(1.9243)
epoch: 1 train_loss: tensor(1.9405, grad_fn=<NllLossBackward>) average train loss tensor(2.0796, grad_fn=<DivBackward0>)
val_acc: 0.5497277409860192 val_avg_loss: tensor(1.6806)
epoch: 2 train_loss: tensor(1.8638, grad_fn=<NllLossBackward>) average train loss tensor(1.9260, grad_fn=<DivBackward0>)
val_acc: 0.5720971302428256 val_avg_loss: tensor(1.5968)
epoch: 3 train_loss: tensor(1.7742, grad_fn=<NllLossBackward>) average train loss tensor(1.8515, grad_fn=<DivBackward0>)
val_acc: 0.582075055187638 val_avg_loss: tensor(1.5543)
epoch: 4 train_loss: tensor(1.7440, grad_fn=<NllLossBackward>) average train loss tensor(1.8061, grad_fn=<DivBackward0>)
val_acc: 0.5884915378955115 val_avg_loss: tensor(1.5244)
epoch: 5 train_loss: tensor(1.6865, grad_fn=<NllLossBackward>) average train loss tensor(1.7758, grad_fn=<DivBa

epoch: 46 train_loss: tensor(1.3365, grad_fn=<NllLossBackward>) average train loss tensor(1.4964, grad_fn=<DivBackward0>)
val_acc: 0.6242531272994849 val_avg_loss: tensor(1.3678)
epoch: 47 train_loss: tensor(1.3226, grad_fn=<NllLossBackward>) average train loss tensor(1.4934, grad_fn=<DivBackward0>)
val_acc: 0.6233701250919794 val_avg_loss: tensor(1.3671)
epoch: 48 train_loss: tensor(1.3359, grad_fn=<NllLossBackward>) average train loss tensor(1.4897, grad_fn=<DivBackward0>)
val_acc: 0.6250772626931568 val_avg_loss: tensor(1.3647)
epoch: 49 train_loss: tensor(1.2944, grad_fn=<NllLossBackward>) average train loss tensor(1.4857, grad_fn=<DivBackward0>)
val_acc: 0.6239882266372333 val_avg_loss: tensor(1.3655)
epoch: 50 train_loss: tensor(1.2921, grad_fn=<NllLossBackward>) average train loss tensor(1.4831, grad_fn=<DivBackward0>)
val_acc: 0.6243414275202355 val_avg_loss: tensor(1.3649)
epoch: 51 train_loss: tensor(1.3153, grad_fn=<NllLossBackward>) average train loss tensor(1.4845, grad_fn

epoch: 92 train_loss: tensor(1.1780, grad_fn=<NllLossBackward>) average train loss tensor(1.4019, grad_fn=<DivBackward0>)
val_acc: 0.6269021339220014 val_avg_loss: tensor(1.3467)
epoch: 93 train_loss: tensor(1.1671, grad_fn=<NllLossBackward>) average train loss tensor(1.4023, grad_fn=<DivBackward0>)
val_acc: 0.6268138337012509 val_avg_loss: tensor(1.3463)
epoch: 94 train_loss: tensor(1.1405, grad_fn=<NllLossBackward>) average train loss tensor(1.4006, grad_fn=<DivBackward0>)
val_acc: 0.6257542310522443 val_avg_loss: tensor(1.3477)
epoch: 95 train_loss: tensor(1.1792, grad_fn=<NllLossBackward>) average train loss tensor(1.4015, grad_fn=<DivBackward0>)
val_acc: 0.6271081677704194 val_avg_loss: tensor(1.3458)
epoch: 96 train_loss: tensor(1.1610, grad_fn=<NllLossBackward>) average train loss tensor(1.3963, grad_fn=<DivBackward0>)
val_acc: 0.626990434142752 val_avg_loss: tensor(1.3465)
epoch: 97 train_loss: tensor(1.1960, grad_fn=<NllLossBackward>) average train loss tensor(1.3997, grad_fn=

### Now train autoencoder on all train samples and model only on 2000 train samples

In [26]:
autoencoder_128_2 = Autoencoder(d=128)
optimizer_128_2 = torch.optim.Adam(autoencoder_128_2.parameters(), lr=1e-3)

stat = fit_autoencoder(autoencoder_128_2, optimizer_128_2, 100, [x_img_train, x_txt_train], [x_img_val, x_txt_val])

train img loss: 0.693 txt_loss: 0.719 img + txt loss 1.412
val img loss: 0.125 val txt_loss: 0.136 img + txt loss 0.261
train img loss: 0.408 txt_loss: 0.463 img + txt loss 0.871
val img loss: 0.086 val txt_loss: 0.103 img + txt loss 0.190
train img loss: 0.303 txt_loss: 0.376 img + txt loss 0.679
val img loss: 0.069 val txt_loss: 0.089 img + txt loss 0.159
train img loss: 0.254 txt_loss: 0.338 img + txt loss 0.592
val img loss: 0.061 val txt_loss: 0.083 img + txt loss 0.144
train img loss: 0.229 txt_loss: 0.317 img + txt loss 0.546
val img loss: 0.056 val txt_loss: 0.078 img + txt loss 0.134
train img loss: 0.213 txt_loss: 0.302 img + txt loss 0.516
val img loss: 0.053 val txt_loss: 0.075 img + txt loss 0.128
train img loss: 0.203 txt_loss: 0.290 img + txt loss 0.493
val img loss: 0.050 val txt_loss: 0.072 img + txt loss 0.123
train img loss: 0.195 txt_loss: 0.280 img + txt loss 0.475
val img loss: 0.049 val txt_loss: 0.070 img + txt loss 0.119
train img loss: 0.189 txt_loss: 0.270 im

val img loss: 0.042 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.042 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 

In [46]:
autoencoder_f_128 = AutoencoderFixed(d=128)
optimizer_f_128 = torch.optim.Adam(autoencoder_f_128.parameters(), lr=1e-3)

stat = fit_autoencoder(autoencoder_f_128, optimizer_f_128, 50, [x_img_train, x_txt_train], [x_img_val, x_txt_val])

train img loss: 0.608 txt_loss: 0.627 img + txt loss 1.236
val img loss: 0.400 val txt_loss: 0.438 img + txt loss 0.839
train img loss: 0.326 txt_loss: 0.366 img + txt loss 0.692
val img loss: 0.271 val txt_loss: 0.312 img + txt loss 0.582
train img loss: 0.242 txt_loss: 0.284 img + txt loss 0.526
val img loss: 0.218 val txt_loss: 0.262 img + txt loss 0.481
train img loss: 0.205 txt_loss: 0.251 img + txt loss 0.456
val img loss: 0.193 val txt_loss: 0.243 img + txt loss 0.436
train img loss: 0.186 txt_loss: 0.239 img + txt loss 0.424
val img loss: 0.179 val txt_loss: 0.235 img + txt loss 0.414
train img loss: 0.175 txt_loss: 0.233 img + txt loss 0.408
val img loss: 0.172 val txt_loss: 0.231 img + txt loss 0.403
train img loss: 0.169 txt_loss: 0.230 img + txt loss 0.399
val img loss: 0.167 val txt_loss: 0.229 img + txt loss 0.396
train img loss: 0.166 txt_loss: 0.228 img + txt loss 0.395
val img loss: 0.165 val txt_loss: 0.228 img + txt loss 0.393
train img loss: 0.165 txt_loss: 0.227 im

In [29]:
training_indices = np.random.randint(low=0, high=x_img_train.shape[0], size=2000)

x_img_train_2000 = x_img_train[training_indices]
x_txt_train_2000 = x_txt_train[training_indices]
y_train_2000 = y_train[training_indices]

x_img_train_t_2000 = torch.tensor(x_img_train_2000).float()
x_txt_train_t_2000 = torch.tensor(x_txt_train_2000).float()
y_train_t_2000 = torch.tensor(y_train_2000).float()

train_ds_2000 = TensorDataset(x_img_train_t_2000, x_txt_train_t_2000, y_train_t_2000)
train_loader_2000 = DataLoader(train_ds_2000, batch_size=512)

In [31]:
encoder_128_2 = autoencoder_128_2.encoder
after_encoder_model_128_2 = AfterEncoderModel(encoder_128_2, d=128, drop=0.5)
optimizer_aem_128_2 = torch.optim.Adam(after_encoder_model_128_2.parameters(), lr=1e-3)
writer = SummaryWriter('runs/aem_128_2_bs2048_rs42_d128_wd0005_drop05_100')

torch_models.fit_topics_model(
    model=after_encoder_model_128_2,
    optimizer=optimizer_aem_128_2,
    epochs=100,
    writer=writer,
    train_loader=train_loader_2000,
    val_loader=val_loader
)

epoch: 0 train_loss: tensor(3.8379, grad_fn=<NllLossBackward>) average train loss tensor(3.9031, grad_fn=<DivBackward0>)
val_acc: 0.09271523178807947 val_avg_loss: tensor(3.8059)
epoch: 1 train_loss: tensor(3.6929, grad_fn=<NllLossBackward>) average train loss tensor(3.7439, grad_fn=<DivBackward0>)
val_acc: 0.09398086828550405 val_avg_loss: tensor(3.6885)
epoch: 2 train_loss: tensor(3.6383, grad_fn=<NllLossBackward>) average train loss tensor(3.6451, grad_fn=<DivBackward0>)
val_acc: 0.12038263428991906 val_avg_loss: tensor(3.5805)
epoch: 3 train_loss: tensor(3.4799, grad_fn=<NllLossBackward>) average train loss tensor(3.5479, grad_fn=<DivBackward0>)
val_acc: 0.19031640912435616 val_avg_loss: tensor(3.4694)
epoch: 4 train_loss: tensor(3.3557, grad_fn=<NllLossBackward>) average train loss tensor(3.4323, grad_fn=<DivBackward0>)
val_acc: 0.23370125091979396 val_avg_loss: tensor(3.3411)
epoch: 5 train_loss: tensor(3.2332, grad_fn=<NllLossBackward>) average train loss tensor(3.3046, grad_fn=

val_acc: 0.49518763796909493 val_avg_loss: tensor(2.0173)
epoch: 47 train_loss: tensor(1.1300, grad_fn=<NllLossBackward>) average train loss tensor(1.1636, grad_fn=<DivBackward0>)
val_acc: 0.4969830757910228 val_avg_loss: tensor(2.0283)
epoch: 48 train_loss: tensor(0.9853, grad_fn=<NllLossBackward>) average train loss tensor(1.1528, grad_fn=<DivBackward0>)
val_acc: 0.496747608535688 val_avg_loss: tensor(2.0398)
epoch: 49 train_loss: tensor(1.0180, grad_fn=<NllLossBackward>) average train loss tensor(1.1201, grad_fn=<DivBackward0>)
val_acc: 0.4926269315673289 val_avg_loss: tensor(2.0592)
epoch: 50 train_loss: tensor(0.9955, grad_fn=<NllLossBackward>) average train loss tensor(1.0994, grad_fn=<DivBackward0>)
val_acc: 0.49439293598233996 val_avg_loss: tensor(2.0670)
epoch: 51 train_loss: tensor(1.0541, grad_fn=<NllLossBackward>) average train loss tensor(1.1100, grad_fn=<DivBackward0>)
val_acc: 0.49303899926416483 val_avg_loss: tensor(2.0846)
epoch: 52 train_loss: tensor(0.9991, grad_fn=<

val_acc: 0.4850036791758646 val_avg_loss: tensor(2.4524)
epoch: 93 train_loss: tensor(0.6659, grad_fn=<NllLossBackward>) average train loss tensor(0.6899, grad_fn=<DivBackward0>)
val_acc: 0.48391464311994115 val_avg_loss: tensor(2.4765)
epoch: 94 train_loss: tensor(0.5933, grad_fn=<NllLossBackward>) average train loss tensor(0.6655, grad_fn=<DivBackward0>)
val_acc: 0.4886534216335541 val_avg_loss: tensor(2.4909)
epoch: 95 train_loss: tensor(0.6520, grad_fn=<NllLossBackward>) average train loss tensor(0.6553, grad_fn=<DivBackward0>)
val_acc: 0.48729948491537894 val_avg_loss: tensor(2.4961)
epoch: 96 train_loss: tensor(0.5678, grad_fn=<NllLossBackward>) average train loss tensor(0.6475, grad_fn=<DivBackward0>)
val_acc: 0.48659308314937455 val_avg_loss: tensor(2.5003)
epoch: 97 train_loss: tensor(0.4935, grad_fn=<NllLossBackward>) average train loss tensor(0.6248, grad_fn=<DivBackward0>)
val_acc: 0.48921265636497424 val_avg_loss: tensor(2.5251)
epoch: 98 train_loss: tensor(0.5798, grad_fn

In [47]:
import pytorch.torch_models as torch_models

encoder_f_128 = autoencoder_f_128.encoder
after_encoder_model_f_128 = AfterEncoderModel(encoder_f_128, d=128, drop=0.5)
optimizer_aem_f_128 = torch.optim.Adam(after_encoder_model_f_128.parameters(), lr=1e-3)
writer = SummaryWriter('runs/aem_f_128_bs2048_rs42_d128_drop05_t2000_100')

torch_models.fit_topics_model(
    model=after_encoder_model_f_128,
    optimizer=optimizer_aem_f_128,
    epochs=100,
    writer=writer,
    train_loader=train_loader_2000,
    val_loader=val_loader
)

epoch: 0 train_loss: tensor(3.8616, grad_fn=<NllLossBackward>) average train loss tensor(3.8841, grad_fn=<DivBackward0>)
val_acc: 0.09091979396615159 val_avg_loss: tensor(3.8390)
epoch: 1 train_loss: tensor(3.7379, grad_fn=<NllLossBackward>) average train loss tensor(3.7809, grad_fn=<DivBackward0>)
val_acc: 0.11599705665930832 val_avg_loss: tensor(3.7186)
epoch: 2 train_loss: tensor(3.5536, grad_fn=<NllLossBackward>) average train loss tensor(3.6227, grad_fn=<DivBackward0>)
val_acc: 0.11646799116997793 val_avg_loss: tensor(3.5462)
epoch: 3 train_loss: tensor(3.3934, grad_fn=<NllLossBackward>) average train loss tensor(3.4654, grad_fn=<DivBackward0>)
val_acc: 0.1914054451802796 val_avg_loss: tensor(3.3924)
epoch: 4 train_loss: tensor(3.1693, grad_fn=<NllLossBackward>) average train loss tensor(3.2956, grad_fn=<DivBackward0>)
val_acc: 0.2513907284768212 val_avg_loss: tensor(3.1974)
epoch: 5 train_loss: tensor(2.9944, grad_fn=<NllLossBackward>) average train loss tensor(3.1018, grad_fn=<D

val_acc: 0.47172921265636497 val_avg_loss: tensor(2.9627)
epoch: 47 train_loss: tensor(0.4260, grad_fn=<NllLossBackward>) average train loss tensor(0.4747, grad_fn=<DivBackward0>)
val_acc: 0.4714054451802796 val_avg_loss: tensor(2.9927)
epoch: 48 train_loss: tensor(0.4707, grad_fn=<NllLossBackward>) average train loss tensor(0.5097, grad_fn=<DivBackward0>)
val_acc: 0.4706401766004415 val_avg_loss: tensor(3.0378)
epoch: 49 train_loss: tensor(0.3891, grad_fn=<NllLossBackward>) average train loss tensor(0.4383, grad_fn=<DivBackward0>)
val_acc: 0.4695805739514349 val_avg_loss: tensor(3.0831)
epoch: 50 train_loss: tensor(0.3751, grad_fn=<NllLossBackward>) average train loss tensor(0.4539, grad_fn=<DivBackward0>)
val_acc: 0.469168506254599 val_avg_loss: tensor(3.1051)
epoch: 51 train_loss: tensor(0.3832, grad_fn=<NllLossBackward>) average train loss tensor(0.4504, grad_fn=<DivBackward0>)
val_acc: 0.4649889624724062 val_avg_loss: tensor(3.1555)
epoch: 52 train_loss: tensor(0.3623, grad_fn=<Nl

val_acc: 0.45818984547461367 val_avg_loss: tensor(4.3374)
epoch: 93 train_loss: tensor(0.1635, grad_fn=<NllLossBackward>) average train loss tensor(0.1892, grad_fn=<DivBackward0>)
val_acc: 0.4563355408388521 val_avg_loss: tensor(4.3592)
epoch: 94 train_loss: tensor(0.1512, grad_fn=<NllLossBackward>) average train loss tensor(0.1784, grad_fn=<DivBackward0>)
val_acc: 0.4570125091979397 val_avg_loss: tensor(4.4123)
epoch: 95 train_loss: tensor(0.1511, grad_fn=<NllLossBackward>) average train loss tensor(0.1689, grad_fn=<DivBackward0>)
val_acc: 0.4582192788815305 val_avg_loss: tensor(4.4261)
epoch: 96 train_loss: tensor(0.1735, grad_fn=<NllLossBackward>) average train loss tensor(0.1824, grad_fn=<DivBackward0>)
val_acc: 0.4594554819720383 val_avg_loss: tensor(4.4187)
epoch: 97 train_loss: tensor(0.0954, grad_fn=<NllLossBackward>) average train loss tensor(0.1540, grad_fn=<DivBackward0>)
val_acc: 0.45933774834437086 val_avg_loss: tensor(4.4270)
epoch: 98 train_loss: tensor(0.1460, grad_fn=<

In [49]:
autoencoder_relu_128 = AutoencoderRelu(d=128)
optimizer_relu_128 = torch.optim.Adam(autoencoder_relu_128.parameters(), lr=1e-3)

stat = fit_autoencoder(autoencoder_relu_128, optimizer_relu_128, 50, [x_img_train, x_txt_train], [x_img_val, x_txt_val])

train img loss: 0.700 txt_loss: 0.731 img + txt loss 1.431
val img loss: 0.519 val txt_loss: 0.567 img + txt loss 1.086
train img loss: 0.458 txt_loss: 0.510 img + txt loss 0.968
val img loss: 0.413 val txt_loss: 0.467 img + txt loss 0.880
train img loss: 0.386 txt_loss: 0.441 img + txt loss 0.826
val img loss: 0.363 val txt_loss: 0.419 img + txt loss 0.782
train img loss: 0.347 txt_loss: 0.402 img + txt loss 0.749
val img loss: 0.332 val txt_loss: 0.389 img + txt loss 0.721
train img loss: 0.321 txt_loss: 0.376 img + txt loss 0.697
val img loss: 0.311 val txt_loss: 0.367 img + txt loss 0.678
train img loss: 0.301 txt_loss: 0.356 img + txt loss 0.656
val img loss: 0.294 val txt_loss: 0.348 img + txt loss 0.642
train img loss: 0.286 txt_loss: 0.339 img + txt loss 0.625
val img loss: 0.280 val txt_loss: 0.332 img + txt loss 0.612
train img loss: 0.273 txt_loss: 0.324 img + txt loss 0.597
val img loss: 0.268 val txt_loss: 0.318 img + txt loss 0.585
train img loss: 0.262 txt_loss: 0.311 im

In [50]:
import pytorch.torch_models as torch_models

encoder_relu_128 = autoencoder_relu_128.encoder
after_encoder_model_relu_128 = AfterEncoderModel(encoder_relu_128, d=128, drop=0.5)
optimizer_aem_relu_128 = torch.optim.Adam(after_encoder_model_relu_128.parameters(), lr=1e-3)
writer = SummaryWriter('runs/aem_relu_128_bs2048_rs42_d128_drop05_t2000_100')

torch_models.fit_topics_model(
    model=after_encoder_model_relu_128,
    optimizer=optimizer_aem_relu_128,
    epochs=100,
    writer=writer,
    train_loader=train_loader_2000,
    val_loader=val_loader
)

epoch: 0 train_loss: tensor(3.8111, grad_fn=<NllLossBackward>) average train loss tensor(3.8890, grad_fn=<DivBackward0>)
val_acc: 0.051979396615158206 val_avg_loss: tensor(3.7849)
epoch: 1 train_loss: tensor(3.6866, grad_fn=<NllLossBackward>) average train loss tensor(3.7291, grad_fn=<DivBackward0>)
val_acc: 0.08264900662251656 val_avg_loss: tensor(3.6674)
epoch: 2 train_loss: tensor(3.5885, grad_fn=<NllLossBackward>) average train loss tensor(3.6417, grad_fn=<DivBackward0>)
val_acc: 0.12868285504047094 val_avg_loss: tensor(3.5684)
epoch: 3 train_loss: tensor(3.4793, grad_fn=<NllLossBackward>) average train loss tensor(3.5419, grad_fn=<DivBackward0>)
val_acc: 0.18908020603384842 val_avg_loss: tensor(3.4693)
epoch: 4 train_loss: tensor(3.3642, grad_fn=<NllLossBackward>) average train loss tensor(3.4289, grad_fn=<DivBackward0>)
val_acc: 0.20753495217071377 val_avg_loss: tensor(3.3417)
epoch: 5 train_loss: tensor(3.2833, grad_fn=<NllLossBackward>) average train loss tensor(3.3199, grad_fn

val_acc: 0.4580132450331126 val_avg_loss: tensor(2.8287)
epoch: 47 train_loss: tensor(0.5273, grad_fn=<NllLossBackward>) average train loss tensor(0.5625, grad_fn=<DivBackward0>)
val_acc: 0.4603090507726269 val_avg_loss: tensor(2.8947)
epoch: 48 train_loss: tensor(0.4474, grad_fn=<NllLossBackward>) average train loss tensor(0.5210, grad_fn=<DivBackward0>)
val_acc: 0.45863134657836646 val_avg_loss: tensor(2.9432)
epoch: 49 train_loss: tensor(0.4732, grad_fn=<NllLossBackward>) average train loss tensor(0.5406, grad_fn=<DivBackward0>)
val_acc: 0.45810154525386315 val_avg_loss: tensor(2.9674)
epoch: 50 train_loss: tensor(0.4247, grad_fn=<NllLossBackward>) average train loss tensor(0.4920, grad_fn=<DivBackward0>)
val_acc: 0.4596615158204562 val_avg_loss: tensor(2.9767)
epoch: 51 train_loss: tensor(0.4152, grad_fn=<NllLossBackward>) average train loss tensor(0.5003, grad_fn=<DivBackward0>)
val_acc: 0.4588373804267844 val_avg_loss: tensor(2.9907)
epoch: 52 train_loss: tensor(0.4292, grad_fn=<

val_acc: 0.44618101545253863 val_avg_loss: tensor(4.4217)
epoch: 93 train_loss: tensor(0.1161, grad_fn=<NllLossBackward>) average train loss tensor(0.1410, grad_fn=<DivBackward0>)
val_acc: 0.4437674760853569 val_avg_loss: tensor(4.4616)
epoch: 94 train_loss: tensor(0.1345, grad_fn=<NllLossBackward>) average train loss tensor(0.1331, grad_fn=<DivBackward0>)
val_acc: 0.44311994113318615 val_avg_loss: tensor(4.4856)
epoch: 95 train_loss: tensor(0.0832, grad_fn=<NllLossBackward>) average train loss tensor(0.1215, grad_fn=<DivBackward0>)
val_acc: 0.4447682119205298 val_avg_loss: tensor(4.5071)
epoch: 96 train_loss: tensor(0.1180, grad_fn=<NllLossBackward>) average train loss tensor(0.1329, grad_fn=<DivBackward0>)
val_acc: 0.447093451066961 val_avg_loss: tensor(4.5562)
epoch: 97 train_loss: tensor(0.1356, grad_fn=<NllLossBackward>) average train loss tensor(0.1412, grad_fn=<DivBackward0>)
val_acc: 0.4472111846946284 val_avg_loss: tensor(4.6275)
epoch: 98 train_loss: tensor(0.1209, grad_fn=<N

In [52]:
autoencoder_concat_128 = AutoencoderConcat(d=128)
optimizer_concat_128 = torch.optim.Adam(autoencoder_concat_128.parameters(), lr=1e-3)

stat = fit_autoencoder(autoencoder_concat_128, optimizer_concat_128, 50, [x_img_train, x_txt_train], [x_img_val, x_txt_val])

train img loss: 0.624 txt_loss: 0.637 img + txt loss 1.261
val img loss: 0.393 val txt_loss: 0.417 img + txt loss 0.810
train img loss: 0.308 txt_loss: 0.339 img + txt loss 0.648
val img loss: 0.246 val txt_loss: 0.286 img + txt loss 0.532
train img loss: 0.215 txt_loss: 0.266 img + txt loss 0.481
val img loss: 0.192 val txt_loss: 0.252 img + txt loss 0.444
train img loss: 0.181 txt_loss: 0.246 img + txt loss 0.427
val img loss: 0.172 val txt_loss: 0.240 img + txt loss 0.412
train img loss: 0.168 txt_loss: 0.237 img + txt loss 0.406
val img loss: 0.165 val txt_loss: 0.234 img + txt loss 0.400
train img loss: 0.164 txt_loss: 0.233 img + txt loss 0.397
val img loss: 0.163 val txt_loss: 0.231 img + txt loss 0.394
train img loss: 0.163 txt_loss: 0.230 img + txt loss 0.393
val img loss: 0.162 val txt_loss: 0.229 img + txt loss 0.391
train img loss: 0.162 txt_loss: 0.229 img + txt loss 0.391
val img loss: 0.162 val txt_loss: 0.228 img + txt loss 0.390
train img loss: 0.162 txt_loss: 0.228 im

In [53]:
encoder_concat_128 = autoencoder_concat_128.encoder
after_encoder_model_concat_128 = AfterEncoderModel(encoder_concat_128, d=128, drop=0.5)
optimizer_aem_concat_128 = torch.optim.Adam(after_encoder_model_concat_128.parameters(), lr=1e-3)
writer = SummaryWriter('runs/aem_concat_128_bs2048_rs42_d128_drop05_t2000_100')

torch_models.fit_topics_model(
    model=after_encoder_model_relu_128,
    optimizer=optimizer_aem_relu_128,
    epochs=100,
    writer=writer,
    train_loader=train_loader_2000,
    val_loader=val_loader
)

epoch: 0 train_loss: tensor(0.1310, grad_fn=<NllLossBackward>) average train loss tensor(0.1469, grad_fn=<DivBackward0>)
val_acc: 0.4429139072847682 val_avg_loss: tensor(4.6798)
epoch: 1 train_loss: tensor(0.0908, grad_fn=<NllLossBackward>) average train loss tensor(0.1235, grad_fn=<DivBackward0>)
val_acc: 0.4416188373804268 val_avg_loss: tensor(4.6694)
epoch: 2 train_loss: tensor(0.1022, grad_fn=<NllLossBackward>) average train loss tensor(0.1271, grad_fn=<DivBackward0>)
val_acc: 0.44235467255334804 val_avg_loss: tensor(4.6251)
epoch: 3 train_loss: tensor(0.1136, grad_fn=<NllLossBackward>) average train loss tensor(0.1184, grad_fn=<DivBackward0>)
val_acc: 0.4440029433406917 val_avg_loss: tensor(4.5841)
epoch: 4 train_loss: tensor(0.0880, grad_fn=<NllLossBackward>) average train loss tensor(0.1080, grad_fn=<DivBackward0>)
val_acc: 0.44332597498160414 val_avg_loss: tensor(4.6219)
epoch: 5 train_loss: tensor(0.0769, grad_fn=<NllLossBackward>) average train loss tensor(0.1151, grad_fn=<Di

val_acc: 0.44409124356144225 val_avg_loss: tensor(5.5885)
epoch: 47 train_loss: tensor(0.0721, grad_fn=<NllLossBackward>) average train loss tensor(0.0643, grad_fn=<DivBackward0>)
val_acc: 0.44479764532744664 val_avg_loss: tensor(5.6368)
epoch: 48 train_loss: tensor(0.0868, grad_fn=<NllLossBackward>) average train loss tensor(0.0774, grad_fn=<DivBackward0>)
val_acc: 0.4455923473142016 val_avg_loss: tensor(5.6604)
epoch: 49 train_loss: tensor(0.0491, grad_fn=<NllLossBackward>) average train loss tensor(0.0578, grad_fn=<DivBackward0>)
val_acc: 0.4434437086092715 val_avg_loss: tensor(5.6674)
epoch: 50 train_loss: tensor(0.0675, grad_fn=<NllLossBackward>) average train loss tensor(0.0749, grad_fn=<DivBackward0>)
val_acc: 0.4411479028697572 val_avg_loss: tensor(5.6427)
epoch: 51 train_loss: tensor(0.0581, grad_fn=<NllLossBackward>) average train loss tensor(0.0733, grad_fn=<DivBackward0>)
val_acc: 0.44379690949227374 val_avg_loss: tensor(5.5728)
epoch: 52 train_loss: tensor(0.0459, grad_fn=

val_acc: 0.4392935982339956 val_avg_loss: tensor(5.9056)
epoch: 93 train_loss: tensor(0.0328, grad_fn=<NllLossBackward>) average train loss tensor(0.0426, grad_fn=<DivBackward0>)
val_acc: 0.43858719646799116 val_avg_loss: tensor(5.9308)
epoch: 94 train_loss: tensor(0.0430, grad_fn=<NllLossBackward>) average train loss tensor(0.0465, grad_fn=<DivBackward0>)
val_acc: 0.4422663723325975 val_avg_loss: tensor(5.9655)
epoch: 95 train_loss: tensor(0.0209, grad_fn=<NllLossBackward>) average train loss tensor(0.0458, grad_fn=<DivBackward0>)
val_acc: 0.44432671081677705 val_avg_loss: tensor(6.0432)
epoch: 96 train_loss: tensor(0.0513, grad_fn=<NllLossBackward>) average train loss tensor(0.0435, grad_fn=<DivBackward0>)
val_acc: 0.44282560706401763 val_avg_loss: tensor(6.1172)
epoch: 97 train_loss: tensor(0.0308, grad_fn=<NllLossBackward>) average train loss tensor(0.0465, grad_fn=<DivBackward0>)
val_acc: 0.4394701986754967 val_avg_loss: tensor(6.1783)
epoch: 98 train_loss: tensor(0.0230, grad_fn=

In [55]:
autoencoder_128_3 = Autoencoder(d=128)
optimizer_128_3 = torch.optim.Adam(autoencoder_128_3.parameters(), lr=1e-3)

stat = fit_autoencoder(autoencoder_128_3, optimizer_128_3, 30, [x_img_train, x_txt_train], [x_img_val, x_txt_val])

train img loss: 0.691 txt_loss: 0.718 img + txt loss 1.408
val img loss: 0.491 val txt_loss: 0.536 img + txt loss 1.027
train img loss: 0.405 txt_loss: 0.463 img + txt loss 0.868
val img loss: 0.338 val txt_loss: 0.407 img + txt loss 0.745
train img loss: 0.300 txt_loss: 0.377 img + txt loss 0.677
val img loss: 0.271 val txt_loss: 0.353 img + txt loss 0.624
train img loss: 0.253 txt_loss: 0.338 img + txt loss 0.591
val img loss: 0.239 val txt_loss: 0.326 img + txt loss 0.565
train img loss: 0.228 txt_loss: 0.317 img + txt loss 0.545
val img loss: 0.220 val txt_loss: 0.309 img + txt loss 0.530
train img loss: 0.214 txt_loss: 0.302 img + txt loss 0.516
val img loss: 0.208 val txt_loss: 0.296 img + txt loss 0.504
train img loss: 0.203 txt_loss: 0.290 img + txt loss 0.493
val img loss: 0.199 val txt_loss: 0.285 img + txt loss 0.484
train img loss: 0.195 txt_loss: 0.279 img + txt loss 0.474
val img loss: 0.192 val txt_loss: 0.275 img + txt loss 0.467
train img loss: 0.189 txt_loss: 0.270 im

In [57]:
cur_train_loader = DataLoader(train_ds_2000, batch_size=2048)

encoder_128_3 = autoencoder_128_3.encoder
after_encoder_model_128_3 = AfterEncoderModel(encoder_128_3, d=128, drop=0.5)
optimizer_aem_128_3 = torch.optim.Adam(after_encoder_model_128_3.parameters(), lr=1e-3)
writer = SummaryWriter('runs/aem_128_3_bs2048_rs42_d128_wd0005_drop05_100')

torch_models.fit_topics_model(
    model=after_encoder_model_128_3,
    optimizer=optimizer_aem_128_3,
    epochs=100,
    writer=writer,
    train_loader=cur_train_loader,
    val_loader=val_loader
)

epoch: 0 train_loss: tensor(3.9509, grad_fn=<NllLossBackward>) average train loss tensor(3.9509, grad_fn=<DivBackward0>)
val_acc: 0.036467991169977926 val_avg_loss: tensor(3.8701)
epoch: 1 train_loss: tensor(3.8789, grad_fn=<NllLossBackward>) average train loss tensor(3.8789, grad_fn=<DivBackward0>)
val_acc: 0.05598233995584989 val_avg_loss: tensor(3.8264)
epoch: 2 train_loss: tensor(3.8290, grad_fn=<NllLossBackward>) average train loss tensor(3.8290, grad_fn=<DivBackward0>)
val_acc: 0.05757174392935983 val_avg_loss: tensor(3.7869)
epoch: 3 train_loss: tensor(3.7779, grad_fn=<NllLossBackward>) average train loss tensor(3.7779, grad_fn=<DivBackward0>)
val_acc: 0.06322295805739514 val_avg_loss: tensor(3.7439)
epoch: 4 train_loss: tensor(3.7325, grad_fn=<NllLossBackward>) average train loss tensor(3.7325, grad_fn=<DivBackward0>)
val_acc: 0.07620309050772626 val_avg_loss: tensor(3.6986)
epoch: 5 train_loss: tensor(3.6882, grad_fn=<NllLossBackward>) average train loss tensor(3.6882, grad_fn

val_acc: 0.44762325239146433 val_avg_loss: tensor(2.0949)
epoch: 46 train_loss: tensor(1.9664, grad_fn=<NllLossBackward>) average train loss tensor(1.9664, grad_fn=<DivBackward0>)
val_acc: 0.4511846946284032 val_avg_loss: tensor(2.0829)
epoch: 47 train_loss: tensor(1.9610, grad_fn=<NllLossBackward>) average train loss tensor(1.9610, grad_fn=<DivBackward0>)
val_acc: 0.453392200147167 val_avg_loss: tensor(2.0732)
epoch: 48 train_loss: tensor(1.9180, grad_fn=<NllLossBackward>) average train loss tensor(1.9180, grad_fn=<DivBackward0>)
val_acc: 0.4553936718175129 val_avg_loss: tensor(2.0637)
epoch: 49 train_loss: tensor(1.9221, grad_fn=<NllLossBackward>) average train loss tensor(1.9221, grad_fn=<DivBackward0>)
val_acc: 0.4576894775570272 val_avg_loss: tensor(2.0547)
epoch: 50 train_loss: tensor(1.8832, grad_fn=<NllLossBackward>) average train loss tensor(1.8832, grad_fn=<DivBackward0>)
val_acc: 0.45927888153053714 val_avg_loss: tensor(2.0467)
epoch: 51 train_loss: tensor(1.8521, grad_fn=<N

val_acc: 0.49200883002207507 val_avg_loss: tensor(1.9955)
epoch: 92 train_loss: tensor(1.2338, grad_fn=<NllLossBackward>) average train loss tensor(1.2338, grad_fn=<DivBackward0>)
val_acc: 0.4925386313465784 val_avg_loss: tensor(2.0019)
epoch: 93 train_loss: tensor(1.2201, grad_fn=<NllLossBackward>) average train loss tensor(1.2201, grad_fn=<DivBackward0>)
val_acc: 0.4923031640912436 val_avg_loss: tensor(2.0076)
epoch: 94 train_loss: tensor(1.2002, grad_fn=<NllLossBackward>) average train loss tensor(1.2002, grad_fn=<DivBackward0>)
val_acc: 0.4933627667402502 val_avg_loss: tensor(2.0133)
epoch: 95 train_loss: tensor(1.1906, grad_fn=<NllLossBackward>) average train loss tensor(1.1906, grad_fn=<DivBackward0>)
val_acc: 0.4919205298013245 val_avg_loss: tensor(2.0189)
epoch: 96 train_loss: tensor(1.1929, grad_fn=<NllLossBackward>) average train loss tensor(1.1929, grad_fn=<DivBackward0>)
val_acc: 0.49212656364974244 val_avg_loss: tensor(2.0243)
epoch: 97 train_loss: tensor(1.1816, grad_fn=<

In [59]:
torch_models.fit_topics_model(
    model=after_encoder_model_128_3,
    optimizer=optimizer_aem_128_3,
    epochs=100,
    writer=writer,
    train_loader=cur_train_loader,
    val_loader=val_loader
)

epoch: 0 train_loss: tensor(1.1750, grad_fn=<NllLossBackward>) average train loss tensor(1.1750, grad_fn=<DivBackward0>)
val_acc: 0.49642384105960263 val_avg_loss: tensor(2.0321)
epoch: 1 train_loss: tensor(1.1366, grad_fn=<NllLossBackward>) average train loss tensor(1.1366, grad_fn=<DivBackward0>)
val_acc: 0.4963944076526858 val_avg_loss: tensor(2.0331)
epoch: 2 train_loss: tensor(1.1363, grad_fn=<NllLossBackward>) average train loss tensor(1.1363, grad_fn=<DivBackward0>)
val_acc: 0.49630610743193526 val_avg_loss: tensor(2.0373)
epoch: 3 train_loss: tensor(1.1504, grad_fn=<NllLossBackward>) average train loss tensor(1.1504, grad_fn=<DivBackward0>)
val_acc: 0.4955114054451803 val_avg_loss: tensor(2.0423)
epoch: 4 train_loss: tensor(1.0970, grad_fn=<NllLossBackward>) average train loss tensor(1.0970, grad_fn=<DivBackward0>)
val_acc: 0.49562913907284767 val_avg_loss: tensor(2.0463)
epoch: 5 train_loss: tensor(1.1007, grad_fn=<NllLossBackward>) average train loss tensor(1.1007, grad_fn=<D

KeyboardInterrupt: 

### comparing autoencoders

In [61]:
constructor_dict = {
    'trivial': Autoencoder,
    'inter_fc': AutoencoderFixed,
    'relu': AutoencoderRelu,
    'complex': AutoencoderComplex
}

In [64]:
autoencoder_dict = {}

for k in constructor_dict:
    cur_autoencoder = constructor_dict[k](d=128)
    cur_optimizer = torch.optim.Adam(cur_autoencoder.parameters(), lr=1e-3)
    fit_autoencoder(
        cur_autoencoder, 
        cur_optimizer, 
        epochs=50, 
        X_train=[x_img_train, x_txt_train], 
        X_val=[x_img_val, x_txt_val]
    ) 
    autoencoder_dict[k] = cur_autoencoder

train img loss: 0.691 txt_loss: 0.715 img + txt loss 1.406
val img loss: 0.489 val txt_loss: 0.532 img + txt loss 1.021
train img loss: 0.404 txt_loss: 0.460 img + txt loss 0.864
val img loss: 0.338 val txt_loss: 0.405 img + txt loss 0.743
train img loss: 0.301 txt_loss: 0.376 img + txt loss 0.677
val img loss: 0.271 val txt_loss: 0.353 img + txt loss 0.624
train img loss: 0.253 txt_loss: 0.338 img + txt loss 0.591
val img loss: 0.239 val txt_loss: 0.326 img + txt loss 0.565
train img loss: 0.229 txt_loss: 0.317 img + txt loss 0.546
val img loss: 0.221 val txt_loss: 0.310 img + txt loss 0.531
train img loss: 0.214 txt_loss: 0.303 img + txt loss 0.517
val img loss: 0.209 val txt_loss: 0.297 img + txt loss 0.506
train img loss: 0.204 txt_loss: 0.291 img + txt loss 0.495
val img loss: 0.200 val txt_loss: 0.286 img + txt loss 0.486
train img loss: 0.196 txt_loss: 0.280 img + txt loss 0.476
val img loss: 0.193 val txt_loss: 0.275 img + txt loss 0.469
train img loss: 0.190 txt_loss: 0.270 im

train img loss: 0.161 txt_loss: 0.225 img + txt loss 0.386
val img loss: 0.161 val txt_loss: 0.225 img + txt loss 0.386
train img loss: 0.161 txt_loss: 0.225 img + txt loss 0.386
val img loss: 0.161 val txt_loss: 0.225 img + txt loss 0.386
train img loss: 0.161 txt_loss: 0.225 img + txt loss 0.386
val img loss: 0.161 val txt_loss: 0.225 img + txt loss 0.386
train img loss: 0.161 txt_loss: 0.225 img + txt loss 0.386
val img loss: 0.161 val txt_loss: 0.225 img + txt loss 0.386
train img loss: 0.161 txt_loss: 0.225 img + txt loss 0.386
val img loss: 0.161 val txt_loss: 0.225 img + txt loss 0.386
train img loss: 0.161 txt_loss: 0.225 img + txt loss 0.386
val img loss: 0.161 val txt_loss: 0.225 img + txt loss 0.386
train img loss: 0.161 txt_loss: 0.225 img + txt loss 0.386
val img loss: 0.161 val txt_loss: 0.225 img + txt loss 0.386
train img loss: 0.161 txt_loss: 0.225 img + txt loss 0.386
val img loss: 0.161 val txt_loss: 0.225 img + txt loss 0.386
train img loss: 0.161 txt_loss: 0.225 im

train img loss: 0.178 txt_loss: 0.231 img + txt loss 0.409
val img loss: 0.178 val txt_loss: 0.232 img + txt loss 0.410
train img loss: 0.178 txt_loss: 0.231 img + txt loss 0.409
val img loss: 0.178 val txt_loss: 0.232 img + txt loss 0.410
train img loss: 0.178 txt_loss: 0.231 img + txt loss 0.408
val img loss: 0.178 val txt_loss: 0.231 img + txt loss 0.410
train img loss: 0.178 txt_loss: 0.231 img + txt loss 0.408
val img loss: 0.178 val txt_loss: 0.232 img + txt loss 0.409
train img loss: 0.177 txt_loss: 0.230 img + txt loss 0.407
val img loss: 0.177 val txt_loss: 0.231 img + txt loss 0.408
train img loss: 0.177 txt_loss: 0.230 img + txt loss 0.407
val img loss: 0.177 val txt_loss: 0.233 img + txt loss 0.410
train img loss: 0.176 txt_loss: 0.230 img + txt loss 0.407
val img loss: 0.177 val txt_loss: 0.231 img + txt loss 0.408
train img loss: 0.177 txt_loss: 0.230 img + txt loss 0.407
val img loss: 0.176 val txt_loss: 0.230 img + txt loss 0.407
train img loss: 0.176 txt_loss: 0.230 im

In [69]:
for rs in range(1, 6):
    
    np.random.seed(rs)
    training_indices = np.random.randint(low=0, high=x_img_train.shape[0], size=2000)

    x_img_train_t_cur = torch.tensor(x_img_train[training_indices]).float()
    x_txt_train_t_cur = torch.tensor(x_txt_train[training_indices]).float()
    y_train_t_cur = torch.tensor(y_train[training_indices]).float()
    
    cur_train_ds = TensorDataset(x_img_train_t_cur, x_txt_train_t_cur, y_train_t_cur)
    cur_train_loader = DataLoader(cur_train_ds, batch_size=512)
    
    for autoencoder_name in constructor_dict:
        
        cur_autoencoder_2000 = copy.deepcopy(autoencoder_dict[autoencoder_name])
        cur_encoder_2000 = cur_autoencoder_2000.encoder
        
        cur_after_encoder_model_2000 = AfterEncoderModel(cur_encoder_2000, d=128, drop=0.5)
        cur_optimizer_aem_2000 = torch.optim.Adam(cur_after_encoder_model_2000.parameters(), lr=1e-3)
        
        writer = SummaryWriter('runs/aem_2000_' + autoencoder_name + '_' + str(rs) + '_bs2048_rs42_d128_wd0005_drop05_100')

        torch_models.fit_topics_model(
            model=cur_after_encoder_model_2000,
            optimizer=cur_optimizer_aem_2000,
            epochs=140,
            writer=writer,
            train_loader=cur_train_loader,
            val_loader=val_loader
        )  

epoch: 0 train_loss: tensor(3.8147, grad_fn=<NllLossBackward>) average train loss tensor(3.8888, grad_fn=<DivBackward0>)
val_acc: 0.0848270787343635 val_avg_loss: tensor(3.7951)
epoch: 1 train_loss: tensor(3.6779, grad_fn=<NllLossBackward>) average train loss tensor(3.7258, grad_fn=<DivBackward0>)
val_acc: 0.08906548933038999 val_avg_loss: tensor(3.6651)
epoch: 2 train_loss: tensor(3.5690, grad_fn=<NllLossBackward>) average train loss tensor(3.6159, grad_fn=<DivBackward0>)
val_acc: 0.1552317880794702 val_avg_loss: tensor(3.5585)
epoch: 3 train_loss: tensor(3.4350, grad_fn=<NllLossBackward>) average train loss tensor(3.5147, grad_fn=<DivBackward0>)
val_acc: 0.2025018395879323 val_avg_loss: tensor(3.4498)
epoch: 4 train_loss: tensor(3.3540, grad_fn=<NllLossBackward>) average train loss tensor(3.4012, grad_fn=<DivBackward0>)
val_acc: 0.2183664459161148 val_avg_loss: tensor(3.3228)
epoch: 5 train_loss: tensor(3.1595, grad_fn=<NllLossBackward>) average train loss tensor(3.2730, grad_fn=<Div

val_acc: 0.48844738778513613 val_avg_loss: tensor(2.0780)
epoch: 47 train_loss: tensor(1.1378, grad_fn=<NllLossBackward>) average train loss tensor(1.1343, grad_fn=<DivBackward0>)
val_acc: 0.4880353200883002 val_avg_loss: tensor(2.0854)
epoch: 48 train_loss: tensor(1.0077, grad_fn=<NllLossBackward>) average train loss tensor(1.1149, grad_fn=<DivBackward0>)
val_acc: 0.4844150110375276 val_avg_loss: tensor(2.1032)
epoch: 49 train_loss: tensor(0.9911, grad_fn=<NllLossBackward>) average train loss tensor(1.1127, grad_fn=<DivBackward0>)
val_acc: 0.48485651214128034 val_avg_loss: tensor(2.1015)
epoch: 50 train_loss: tensor(1.0222, grad_fn=<NllLossBackward>) average train loss tensor(1.1015, grad_fn=<DivBackward0>)
val_acc: 0.48850625459896985 val_avg_loss: tensor(2.0991)
epoch: 51 train_loss: tensor(1.0195, grad_fn=<NllLossBackward>) average train loss tensor(1.0674, grad_fn=<DivBackward0>)
val_acc: 0.48944812362030904 val_avg_loss: tensor(2.1156)
epoch: 52 train_loss: tensor(0.9973, grad_fn

val_acc: 0.4850331125827815 val_avg_loss: tensor(2.5302)
epoch: 93 train_loss: tensor(0.5719, grad_fn=<NllLossBackward>) average train loss tensor(0.6456, grad_fn=<DivBackward0>)
val_acc: 0.48353200883002206 val_avg_loss: tensor(2.5595)
epoch: 94 train_loss: tensor(0.6515, grad_fn=<NllLossBackward>) average train loss tensor(0.6449, grad_fn=<DivBackward0>)
val_acc: 0.4835614422369389 val_avg_loss: tensor(2.5710)
epoch: 95 train_loss: tensor(0.5394, grad_fn=<NllLossBackward>) average train loss tensor(0.6228, grad_fn=<DivBackward0>)
val_acc: 0.48606328182487124 val_avg_loss: tensor(2.5694)
epoch: 96 train_loss: tensor(0.5521, grad_fn=<NllLossBackward>) average train loss tensor(0.6499, grad_fn=<DivBackward0>)
val_acc: 0.48264900662251653 val_avg_loss: tensor(2.5730)
epoch: 97 train_loss: tensor(0.5014, grad_fn=<NllLossBackward>) average train loss tensor(0.6081, grad_fn=<DivBackward0>)
val_acc: 0.48129506990434145 val_avg_loss: tensor(2.5706)
epoch: 98 train_loss: tensor(0.5678, grad_fn

val_acc: 0.4797350993377483 val_avg_loss: tensor(3.0016)
epoch: 139 train_loss: tensor(0.4137, grad_fn=<NllLossBackward>) average train loss tensor(0.4401, grad_fn=<DivBackward0>)
val_acc: 0.48108903605592346 val_avg_loss: tensor(2.9988)
epoch: 0 train_loss: tensor(3.8626, grad_fn=<NllLossBackward>) average train loss tensor(3.8913, grad_fn=<DivBackward0>)
val_acc: 0.1012214863870493 val_avg_loss: tensor(3.8480)
epoch: 1 train_loss: tensor(3.7496, grad_fn=<NllLossBackward>) average train loss tensor(3.7900, grad_fn=<DivBackward0>)
val_acc: 0.13495217071376012 val_avg_loss: tensor(3.7369)
epoch: 2 train_loss: tensor(3.5519, grad_fn=<NllLossBackward>) average train loss tensor(3.6285, grad_fn=<DivBackward0>)
val_acc: 0.1316850625459897 val_avg_loss: tensor(3.5469)
epoch: 3 train_loss: tensor(3.3690, grad_fn=<NllLossBackward>) average train loss tensor(3.4599, grad_fn=<DivBackward0>)
val_acc: 0.15793966151582045 val_avg_loss: tensor(3.3940)
epoch: 4 train_loss: tensor(3.1988, grad_fn=<Nll

val_acc: 0.4696983075791023 val_avg_loss: tensor(2.9370)
epoch: 45 train_loss: tensor(0.4744, grad_fn=<NllLossBackward>) average train loss tensor(0.5237, grad_fn=<DivBackward0>)
val_acc: 0.46545989698307577 val_avg_loss: tensor(2.9811)
epoch: 46 train_loss: tensor(0.4756, grad_fn=<NllLossBackward>) average train loss tensor(0.5118, grad_fn=<DivBackward0>)
val_acc: 0.4643708609271523 val_avg_loss: tensor(2.9735)
epoch: 47 train_loss: tensor(0.4558, grad_fn=<NllLossBackward>) average train loss tensor(0.4831, grad_fn=<DivBackward0>)
val_acc: 0.46707873436350256 val_avg_loss: tensor(3.0255)
epoch: 48 train_loss: tensor(0.4473, grad_fn=<NllLossBackward>) average train loss tensor(0.4714, grad_fn=<DivBackward0>)
val_acc: 0.4694922737306843 val_avg_loss: tensor(3.0477)
epoch: 49 train_loss: tensor(0.4601, grad_fn=<NllLossBackward>) average train loss tensor(0.4692, grad_fn=<DivBackward0>)
val_acc: 0.4649595290654893 val_avg_loss: tensor(3.1071)
epoch: 50 train_loss: tensor(0.3913, grad_fn=<

val_acc: 0.45398086828550405 val_avg_loss: tensor(4.3483)
epoch: 91 train_loss: tensor(0.1832, grad_fn=<NllLossBackward>) average train loss tensor(0.1885, grad_fn=<DivBackward0>)
val_acc: 0.4559823399558499 val_avg_loss: tensor(4.3706)
epoch: 92 train_loss: tensor(0.1928, grad_fn=<NllLossBackward>) average train loss tensor(0.1960, grad_fn=<DivBackward0>)
val_acc: 0.4544812362030905 val_avg_loss: tensor(4.4083)
epoch: 93 train_loss: tensor(0.1831, grad_fn=<NllLossBackward>) average train loss tensor(0.1925, grad_fn=<DivBackward0>)
val_acc: 0.4533038999264165 val_avg_loss: tensor(4.4273)
epoch: 94 train_loss: tensor(0.1378, grad_fn=<NllLossBackward>) average train loss tensor(0.1867, grad_fn=<DivBackward0>)
val_acc: 0.4529212656364974 val_avg_loss: tensor(4.4712)
epoch: 95 train_loss: tensor(0.1954, grad_fn=<NllLossBackward>) average train loss tensor(0.1746, grad_fn=<DivBackward0>)
val_acc: 0.45038999264164825 val_avg_loss: tensor(4.4507)
epoch: 96 train_loss: tensor(0.1448, grad_fn=<

val_acc: 0.45159676232523915 val_avg_loss: tensor(5.3114)
epoch: 137 train_loss: tensor(0.0750, grad_fn=<NllLossBackward>) average train loss tensor(0.1037, grad_fn=<DivBackward0>)
val_acc: 0.45153789551140544 val_avg_loss: tensor(5.3295)
epoch: 138 train_loss: tensor(0.1094, grad_fn=<NllLossBackward>) average train loss tensor(0.1047, grad_fn=<DivBackward0>)
val_acc: 0.44986019131714494 val_avg_loss: tensor(5.3397)
epoch: 139 train_loss: tensor(0.0952, grad_fn=<NllLossBackward>) average train loss tensor(0.1027, grad_fn=<DivBackward0>)
val_acc: 0.449271523178808 val_avg_loss: tensor(5.3887)
epoch: 0 train_loss: tensor(3.7994, grad_fn=<NllLossBackward>) average train loss tensor(3.8983, grad_fn=<DivBackward0>)
val_acc: 0.08803532008830021 val_avg_loss: tensor(3.7558)
epoch: 1 train_loss: tensor(3.6780, grad_fn=<NllLossBackward>) average train loss tensor(3.7378, grad_fn=<DivBackward0>)
val_acc: 0.07579102281089035 val_avg_loss: tensor(3.6553)
epoch: 2 train_loss: tensor(3.6011, grad_fn

val_acc: 0.4657542310522443 val_avg_loss: tensor(2.6130)
epoch: 43 train_loss: tensor(0.6575, grad_fn=<NllLossBackward>) average train loss tensor(0.7042, grad_fn=<DivBackward0>)
val_acc: 0.4603679175864606 val_avg_loss: tensor(2.6708)
epoch: 44 train_loss: tensor(0.6693, grad_fn=<NllLossBackward>) average train loss tensor(0.6891, grad_fn=<DivBackward0>)
val_acc: 0.46278145695364237 val_avg_loss: tensor(2.7573)
epoch: 45 train_loss: tensor(0.6130, grad_fn=<NllLossBackward>) average train loss tensor(0.6710, grad_fn=<DivBackward0>)
val_acc: 0.4624282560706402 val_avg_loss: tensor(2.7753)
epoch: 46 train_loss: tensor(0.5786, grad_fn=<NllLossBackward>) average train loss tensor(0.6311, grad_fn=<DivBackward0>)
val_acc: 0.45789551140544515 val_avg_loss: tensor(2.7998)
epoch: 47 train_loss: tensor(0.5894, grad_fn=<NllLossBackward>) average train loss tensor(0.6384, grad_fn=<DivBackward0>)
val_acc: 0.46125091979396615 val_avg_loss: tensor(2.8218)
epoch: 48 train_loss: tensor(0.4750, grad_fn=

val_acc: 0.44282560706401763 val_avg_loss: tensor(4.3616)
epoch: 89 train_loss: tensor(0.1525, grad_fn=<NllLossBackward>) average train loss tensor(0.1697, grad_fn=<DivBackward0>)
val_acc: 0.44182487122884473 val_avg_loss: tensor(4.4300)
epoch: 90 train_loss: tensor(0.1881, grad_fn=<NllLossBackward>) average train loss tensor(0.1753, grad_fn=<DivBackward0>)
val_acc: 0.4438852097130243 val_avg_loss: tensor(4.4808)
epoch: 91 train_loss: tensor(0.1771, grad_fn=<NllLossBackward>) average train loss tensor(0.1586, grad_fn=<DivBackward0>)
val_acc: 0.444532744665195 val_avg_loss: tensor(4.5680)
epoch: 92 train_loss: tensor(0.1600, grad_fn=<NllLossBackward>) average train loss tensor(0.1650, grad_fn=<DivBackward0>)
val_acc: 0.4464459161147903 val_avg_loss: tensor(4.6406)
epoch: 93 train_loss: tensor(0.1390, grad_fn=<NllLossBackward>) average train loss tensor(0.1600, grad_fn=<DivBackward0>)
val_acc: 0.445121412803532 val_avg_loss: tensor(4.6056)
epoch: 94 train_loss: tensor(0.1395, grad_fn=<Nl

val_acc: 0.445121412803532 val_avg_loss: tensor(5.3603)
epoch: 135 train_loss: tensor(0.0859, grad_fn=<NllLossBackward>) average train loss tensor(0.0812, grad_fn=<DivBackward0>)
val_acc: 0.4440029433406917 val_avg_loss: tensor(5.4543)
epoch: 136 train_loss: tensor(0.0791, grad_fn=<NllLossBackward>) average train loss tensor(0.0911, grad_fn=<DivBackward0>)
val_acc: 0.4442384105960265 val_avg_loss: tensor(5.4508)
epoch: 137 train_loss: tensor(0.0634, grad_fn=<NllLossBackward>) average train loss tensor(0.0697, grad_fn=<DivBackward0>)
val_acc: 0.44179543782192787 val_avg_loss: tensor(5.4740)
epoch: 138 train_loss: tensor(0.1358, grad_fn=<NllLossBackward>) average train loss tensor(0.0892, grad_fn=<DivBackward0>)
val_acc: 0.4398822663723326 val_avg_loss: tensor(5.5204)
epoch: 139 train_loss: tensor(0.0699, grad_fn=<NllLossBackward>) average train loss tensor(0.0827, grad_fn=<DivBackward0>)
val_acc: 0.4429727740986019 val_avg_loss: tensor(5.4941)
epoch: 0 train_loss: tensor(3.8323, grad_fn

val_acc: 0.4886534216335541 val_avg_loss: tensor(2.0096)
epoch: 41 train_loss: tensor(1.2651, grad_fn=<NllLossBackward>) average train loss tensor(1.2989, grad_fn=<DivBackward0>)
val_acc: 0.48933038999264167 val_avg_loss: tensor(2.0112)
epoch: 42 train_loss: tensor(1.1944, grad_fn=<NllLossBackward>) average train loss tensor(1.2660, grad_fn=<DivBackward0>)
val_acc: 0.49059602649006623 val_avg_loss: tensor(2.0150)
epoch: 43 train_loss: tensor(1.2160, grad_fn=<NllLossBackward>) average train loss tensor(1.2241, grad_fn=<DivBackward0>)
val_acc: 0.48944812362030904 val_avg_loss: tensor(2.0351)
epoch: 44 train_loss: tensor(1.1471, grad_fn=<NllLossBackward>) average train loss tensor(1.1922, grad_fn=<DivBackward0>)
val_acc: 0.4906254598969831 val_avg_loss: tensor(2.0422)
epoch: 45 train_loss: tensor(1.1645, grad_fn=<NllLossBackward>) average train loss tensor(1.1704, grad_fn=<DivBackward0>)
val_acc: 0.4920971302428256 val_avg_loss: tensor(2.0463)
epoch: 46 train_loss: tensor(1.0362, grad_fn=

val_acc: 0.48950699043414275 val_avg_loss: tensor(2.4444)
epoch: 87 train_loss: tensor(0.6564, grad_fn=<NllLossBackward>) average train loss tensor(0.7119, grad_fn=<DivBackward0>)
val_acc: 0.4860044150110375 val_avg_loss: tensor(2.4446)
epoch: 88 train_loss: tensor(0.6234, grad_fn=<NllLossBackward>) average train loss tensor(0.6913, grad_fn=<DivBackward0>)
val_acc: 0.4846504782928624 val_avg_loss: tensor(2.4466)
epoch: 89 train_loss: tensor(0.5705, grad_fn=<NllLossBackward>) average train loss tensor(0.6530, grad_fn=<DivBackward0>)
val_acc: 0.4906843267108168 val_avg_loss: tensor(2.4560)
epoch: 90 train_loss: tensor(0.6421, grad_fn=<NllLossBackward>) average train loss tensor(0.6803, grad_fn=<DivBackward0>)
val_acc: 0.48821192052980134 val_avg_loss: tensor(2.4823)
epoch: 91 train_loss: tensor(0.6347, grad_fn=<NllLossBackward>) average train loss tensor(0.6623, grad_fn=<DivBackward0>)
val_acc: 0.48706401766004415 val_avg_loss: tensor(2.4795)
epoch: 92 train_loss: tensor(0.5389, grad_fn=

val_acc: 0.4837969094922737 val_avg_loss: tensor(2.8293)
epoch: 133 train_loss: tensor(0.3930, grad_fn=<NllLossBackward>) average train loss tensor(0.4342, grad_fn=<DivBackward0>)
val_acc: 0.4870345842531273 val_avg_loss: tensor(2.8229)
epoch: 134 train_loss: tensor(0.4037, grad_fn=<NllLossBackward>) average train loss tensor(0.4462, grad_fn=<DivBackward0>)
val_acc: 0.488476821192053 val_avg_loss: tensor(2.8247)
epoch: 135 train_loss: tensor(0.4586, grad_fn=<NllLossBackward>) average train loss tensor(0.4325, grad_fn=<DivBackward0>)
val_acc: 0.4855629139072848 val_avg_loss: tensor(2.8603)
epoch: 136 train_loss: tensor(0.4148, grad_fn=<NllLossBackward>) average train loss tensor(0.4332, grad_fn=<DivBackward0>)
val_acc: 0.4830316409124356 val_avg_loss: tensor(2.8973)
epoch: 137 train_loss: tensor(0.4104, grad_fn=<NllLossBackward>) average train loss tensor(0.4360, grad_fn=<DivBackward0>)
val_acc: 0.4827373068432671 val_avg_loss: tensor(2.9231)
epoch: 138 train_loss: tensor(0.4465, grad_f

val_acc: 0.4795879323031641 val_avg_loss: tensor(2.0439)
epoch: 39 train_loss: tensor(1.2524, grad_fn=<NllLossBackward>) average train loss tensor(1.3256, grad_fn=<DivBackward0>)
val_acc: 0.48100073583517294 val_avg_loss: tensor(2.0420)
epoch: 40 train_loss: tensor(1.1821, grad_fn=<NllLossBackward>) average train loss tensor(1.3137, grad_fn=<DivBackward0>)
val_acc: 0.48264900662251653 val_avg_loss: tensor(2.0544)
epoch: 41 train_loss: tensor(1.1609, grad_fn=<NllLossBackward>) average train loss tensor(1.3110, grad_fn=<DivBackward0>)
val_acc: 0.48132450331125826 val_avg_loss: tensor(2.0645)
epoch: 42 train_loss: tensor(1.1748, grad_fn=<NllLossBackward>) average train loss tensor(1.2772, grad_fn=<DivBackward0>)
val_acc: 0.48 val_avg_loss: tensor(2.0719)
epoch: 43 train_loss: tensor(1.1432, grad_fn=<NllLossBackward>) average train loss tensor(1.2380, grad_fn=<DivBackward0>)
val_acc: 0.4819131714495953 val_avg_loss: tensor(2.0742)
epoch: 44 train_loss: tensor(1.1282, grad_fn=<NllLossBackwa

val_acc: 0.47979396615158204 val_avg_loss: tensor(2.4878)
epoch: 85 train_loss: tensor(0.6517, grad_fn=<NllLossBackward>) average train loss tensor(0.7097, grad_fn=<DivBackward0>)
val_acc: 0.4810596026490066 val_avg_loss: tensor(2.4990)
epoch: 86 train_loss: tensor(0.6316, grad_fn=<NllLossBackward>) average train loss tensor(0.6929, grad_fn=<DivBackward0>)
val_acc: 0.4823841059602649 val_avg_loss: tensor(2.4862)
epoch: 87 train_loss: tensor(0.5983, grad_fn=<NllLossBackward>) average train loss tensor(0.6860, grad_fn=<DivBackward0>)
val_acc: 0.4798822663723326 val_avg_loss: tensor(2.4987)
epoch: 88 train_loss: tensor(0.5855, grad_fn=<NllLossBackward>) average train loss tensor(0.6727, grad_fn=<DivBackward0>)
val_acc: 0.47902869757174393 val_avg_loss: tensor(2.5231)
epoch: 89 train_loss: tensor(0.6207, grad_fn=<NllLossBackward>) average train loss tensor(0.6781, grad_fn=<DivBackward0>)
val_acc: 0.482560706401766 val_avg_loss: tensor(2.5279)
epoch: 90 train_loss: tensor(0.6737, grad_fn=<N

val_acc: 0.46961000735835173 val_avg_loss: tensor(2.9527)
epoch: 131 train_loss: tensor(0.4592, grad_fn=<NllLossBackward>) average train loss tensor(0.4495, grad_fn=<DivBackward0>)
val_acc: 0.47167034584253126 val_avg_loss: tensor(2.9659)
epoch: 132 train_loss: tensor(0.4046, grad_fn=<NllLossBackward>) average train loss tensor(0.4709, grad_fn=<DivBackward0>)
val_acc: 0.47508462104488597 val_avg_loss: tensor(2.9537)
epoch: 133 train_loss: tensor(0.4145, grad_fn=<NllLossBackward>) average train loss tensor(0.4357, grad_fn=<DivBackward0>)
val_acc: 0.47152317880794703 val_avg_loss: tensor(2.9444)
epoch: 134 train_loss: tensor(0.3938, grad_fn=<NllLossBackward>) average train loss tensor(0.4402, grad_fn=<DivBackward0>)
val_acc: 0.473701250919794 val_avg_loss: tensor(2.9433)
epoch: 135 train_loss: tensor(0.3996, grad_fn=<NllLossBackward>) average train loss tensor(0.4440, grad_fn=<DivBackward0>)
val_acc: 0.4765268579838116 val_avg_loss: tensor(2.9601)
epoch: 136 train_loss: tensor(0.3866, gr

val_acc: 0.46722590139808684 val_avg_loss: tensor(2.7182)
epoch: 37 train_loss: tensor(0.6171, grad_fn=<NllLossBackward>) average train loss tensor(0.6831, grad_fn=<DivBackward0>)
val_acc: 0.4661074319352465 val_avg_loss: tensor(2.7670)
epoch: 38 train_loss: tensor(0.5602, grad_fn=<NllLossBackward>) average train loss tensor(0.6169, grad_fn=<DivBackward0>)
val_acc: 0.4639587932303164 val_avg_loss: tensor(2.8160)
epoch: 39 train_loss: tensor(0.5850, grad_fn=<NllLossBackward>) average train loss tensor(0.6284, grad_fn=<DivBackward0>)
val_acc: 0.46507726269315675 val_avg_loss: tensor(2.8525)
epoch: 40 train_loss: tensor(0.5292, grad_fn=<NllLossBackward>) average train loss tensor(0.6053, grad_fn=<DivBackward0>)
val_acc: 0.4679028697571744 val_avg_loss: tensor(2.8764)
epoch: 41 train_loss: tensor(0.4963, grad_fn=<NllLossBackward>) average train loss tensor(0.5789, grad_fn=<DivBackward0>)
val_acc: 0.4687270051508462 val_avg_loss: tensor(2.9231)
epoch: 42 train_loss: tensor(0.5017, grad_fn=<

val_acc: 0.4534805003679176 val_avg_loss: tensor(4.2753)
epoch: 83 train_loss: tensor(0.1778, grad_fn=<NllLossBackward>) average train loss tensor(0.2124, grad_fn=<DivBackward0>)
val_acc: 0.45321559970566594 val_avg_loss: tensor(4.2991)
epoch: 84 train_loss: tensor(0.1536, grad_fn=<NllLossBackward>) average train loss tensor(0.2098, grad_fn=<DivBackward0>)
val_acc: 0.45574687270051506 val_avg_loss: tensor(4.2819)
epoch: 85 train_loss: tensor(0.1893, grad_fn=<NllLossBackward>) average train loss tensor(0.2032, grad_fn=<DivBackward0>)
val_acc: 0.4537748344370861 val_avg_loss: tensor(4.3075)
epoch: 86 train_loss: tensor(0.1778, grad_fn=<NllLossBackward>) average train loss tensor(0.2056, grad_fn=<DivBackward0>)
val_acc: 0.4519499632082414 val_avg_loss: tensor(4.3716)
epoch: 87 train_loss: tensor(0.2154, grad_fn=<NllLossBackward>) average train loss tensor(0.2284, grad_fn=<DivBackward0>)
val_acc: 0.4475938189845475 val_avg_loss: tensor(4.3869)
epoch: 88 train_loss: tensor(0.2005, grad_fn=<

val_acc: 0.450242825607064 val_avg_loss: tensor(5.2613)
epoch: 129 train_loss: tensor(0.1150, grad_fn=<NllLossBackward>) average train loss tensor(0.1235, grad_fn=<DivBackward0>)
val_acc: 0.4458278145695364 val_avg_loss: tensor(5.2755)
epoch: 130 train_loss: tensor(0.0665, grad_fn=<NllLossBackward>) average train loss tensor(0.0964, grad_fn=<DivBackward0>)
val_acc: 0.44432671081677705 val_avg_loss: tensor(5.3421)
epoch: 131 train_loss: tensor(0.1049, grad_fn=<NllLossBackward>) average train loss tensor(0.1163, grad_fn=<DivBackward0>)
val_acc: 0.44456217807211185 val_avg_loss: tensor(5.3718)
epoch: 132 train_loss: tensor(0.0968, grad_fn=<NllLossBackward>) average train loss tensor(0.0974, grad_fn=<DivBackward0>)
val_acc: 0.44653421633554086 val_avg_loss: tensor(5.3769)
epoch: 133 train_loss: tensor(0.0856, grad_fn=<NllLossBackward>) average train loss tensor(0.1069, grad_fn=<DivBackward0>)
val_acc: 0.4469168506254599 val_avg_loss: tensor(5.3753)
epoch: 134 train_loss: tensor(0.1247, gra

val_acc: 0.4625754231052244 val_avg_loss: tensor(2.3751)
epoch: 35 train_loss: tensor(0.8486, grad_fn=<NllLossBackward>) average train loss tensor(0.9529, grad_fn=<DivBackward0>)
val_acc: 0.4635172921265637 val_avg_loss: tensor(2.4049)
epoch: 36 train_loss: tensor(0.8491, grad_fn=<NllLossBackward>) average train loss tensor(0.9033, grad_fn=<DivBackward0>)
val_acc: 0.4609271523178808 val_avg_loss: tensor(2.4526)
epoch: 37 train_loss: tensor(0.8279, grad_fn=<NllLossBackward>) average train loss tensor(0.8717, grad_fn=<DivBackward0>)
val_acc: 0.45880794701986755 val_avg_loss: tensor(2.5134)
epoch: 38 train_loss: tensor(0.7713, grad_fn=<NllLossBackward>) average train loss tensor(0.8388, grad_fn=<DivBackward0>)
val_acc: 0.45948491537895514 val_avg_loss: tensor(2.5560)
epoch: 39 train_loss: tensor(0.6857, grad_fn=<NllLossBackward>) average train loss tensor(0.8134, grad_fn=<DivBackward0>)
val_acc: 0.45933774834437086 val_avg_loss: tensor(2.5742)
epoch: 40 train_loss: tensor(0.6864, grad_fn=

val_acc: 0.4370272259013981 val_avg_loss: tensor(4.2051)
epoch: 81 train_loss: tensor(0.1557, grad_fn=<NllLossBackward>) average train loss tensor(0.1972, grad_fn=<DivBackward0>)
val_acc: 0.4376158940397351 val_avg_loss: tensor(4.1826)
epoch: 82 train_loss: tensor(0.1606, grad_fn=<NllLossBackward>) average train loss tensor(0.1953, grad_fn=<DivBackward0>)
val_acc: 0.4403532008830022 val_avg_loss: tensor(4.2126)
epoch: 83 train_loss: tensor(0.1745, grad_fn=<NllLossBackward>) average train loss tensor(0.1872, grad_fn=<DivBackward0>)
val_acc: 0.4410007358351729 val_avg_loss: tensor(4.2356)
epoch: 84 train_loss: tensor(0.1302, grad_fn=<NllLossBackward>) average train loss tensor(0.1643, grad_fn=<DivBackward0>)
val_acc: 0.43858719646799116 val_avg_loss: tensor(4.2622)
epoch: 85 train_loss: tensor(0.1800, grad_fn=<NllLossBackward>) average train loss tensor(0.1915, grad_fn=<DivBackward0>)
val_acc: 0.43788079470198676 val_avg_loss: tensor(4.2979)
epoch: 86 train_loss: tensor(0.1509, grad_fn=<

val_acc: 0.43740986019131717 val_avg_loss: tensor(5.1584)
epoch: 127 train_loss: tensor(0.1134, grad_fn=<NllLossBackward>) average train loss tensor(0.1082, grad_fn=<DivBackward0>)
val_acc: 0.43573215599705667 val_avg_loss: tensor(5.2414)
epoch: 128 train_loss: tensor(0.0786, grad_fn=<NllLossBackward>) average train loss tensor(0.0857, grad_fn=<DivBackward0>)
val_acc: 0.4363208241353937 val_avg_loss: tensor(5.2548)
epoch: 129 train_loss: tensor(0.0783, grad_fn=<NllLossBackward>) average train loss tensor(0.0736, grad_fn=<DivBackward0>)
val_acc: 0.4364679911699779 val_avg_loss: tensor(5.2932)
epoch: 130 train_loss: tensor(0.0643, grad_fn=<NllLossBackward>) average train loss tensor(0.0815, grad_fn=<DivBackward0>)
val_acc: 0.4372626931567329 val_avg_loss: tensor(5.3277)
epoch: 131 train_loss: tensor(0.0633, grad_fn=<NllLossBackward>) average train loss tensor(0.0818, grad_fn=<DivBackward0>)
val_acc: 0.43690949227373066 val_avg_loss: tensor(5.3900)
epoch: 132 train_loss: tensor(0.0557, gr

val_acc: 0.4779690949227373 val_avg_loss: tensor(2.0160)
epoch: 33 train_loss: tensor(1.3552, grad_fn=<NllLossBackward>) average train loss tensor(1.4148, grad_fn=<DivBackward0>)
val_acc: 0.4757615894039735 val_avg_loss: tensor(2.0275)
epoch: 34 train_loss: tensor(1.3257, grad_fn=<NllLossBackward>) average train loss tensor(1.4238, grad_fn=<DivBackward0>)
val_acc: 0.4785871964679912 val_avg_loss: tensor(2.0258)
epoch: 35 train_loss: tensor(1.2908, grad_fn=<NllLossBackward>) average train loss tensor(1.3996, grad_fn=<DivBackward0>)
val_acc: 0.47846946284032377 val_avg_loss: tensor(2.0281)
epoch: 36 train_loss: tensor(1.2750, grad_fn=<NllLossBackward>) average train loss tensor(1.3577, grad_fn=<DivBackward0>)
val_acc: 0.4805298013245033 val_avg_loss: tensor(2.0323)
epoch: 37 train_loss: tensor(1.2275, grad_fn=<NllLossBackward>) average train loss tensor(1.3371, grad_fn=<DivBackward0>)
val_acc: 0.48264900662251653 val_avg_loss: tensor(2.0368)
epoch: 38 train_loss: tensor(1.2299, grad_fn=<

val_acc: 0.479205298013245 val_avg_loss: tensor(2.4120)
epoch: 79 train_loss: tensor(0.6769, grad_fn=<NllLossBackward>) average train loss tensor(0.7343, grad_fn=<DivBackward0>)
val_acc: 0.4760853568800589 val_avg_loss: tensor(2.4381)
epoch: 80 train_loss: tensor(0.6396, grad_fn=<NllLossBackward>) average train loss tensor(0.7094, grad_fn=<DivBackward0>)
val_acc: 0.47673289183222956 val_avg_loss: tensor(2.4666)
epoch: 81 train_loss: tensor(0.7241, grad_fn=<NllLossBackward>) average train loss tensor(0.7540, grad_fn=<DivBackward0>)
val_acc: 0.47864606328182485 val_avg_loss: tensor(2.4840)
epoch: 82 train_loss: tensor(0.6793, grad_fn=<NllLossBackward>) average train loss tensor(0.7270, grad_fn=<DivBackward0>)
val_acc: 0.48032376747608535 val_avg_loss: tensor(2.4923)
epoch: 83 train_loss: tensor(0.6688, grad_fn=<NllLossBackward>) average train loss tensor(0.7379, grad_fn=<DivBackward0>)
val_acc: 0.4764679911699779 val_avg_loss: tensor(2.5110)
epoch: 84 train_loss: tensor(0.6410, grad_fn=<

val_acc: 0.4736129506990434 val_avg_loss: tensor(2.9243)
epoch: 125 train_loss: tensor(0.3358, grad_fn=<NllLossBackward>) average train loss tensor(0.4830, grad_fn=<DivBackward0>)
val_acc: 0.4718469462840324 val_avg_loss: tensor(2.9398)
epoch: 126 train_loss: tensor(0.4366, grad_fn=<NllLossBackward>) average train loss tensor(0.4932, grad_fn=<DivBackward0>)
val_acc: 0.47013980868285504 val_avg_loss: tensor(2.9339)
epoch: 127 train_loss: tensor(0.3655, grad_fn=<NllLossBackward>) average train loss tensor(0.4684, grad_fn=<DivBackward0>)
val_acc: 0.469168506254599 val_avg_loss: tensor(2.9238)
epoch: 128 train_loss: tensor(0.4083, grad_fn=<NllLossBackward>) average train loss tensor(0.4843, grad_fn=<DivBackward0>)
val_acc: 0.47181751287711554 val_avg_loss: tensor(2.9535)
epoch: 129 train_loss: tensor(0.3882, grad_fn=<NllLossBackward>) average train loss tensor(0.4504, grad_fn=<DivBackward0>)
val_acc: 0.4723767476085357 val_avg_loss: tensor(2.9844)
epoch: 130 train_loss: tensor(0.3968, grad

val_acc: 0.47543782192788814 val_avg_loss: tensor(2.0098)
epoch: 31 train_loss: tensor(1.3598, grad_fn=<NllLossBackward>) average train loss tensor(1.5003, grad_fn=<DivBackward0>)
val_acc: 0.47579102281089036 val_avg_loss: tensor(2.0117)
epoch: 32 train_loss: tensor(1.4138, grad_fn=<NllLossBackward>) average train loss tensor(1.5334, grad_fn=<DivBackward0>)
val_acc: 0.47443708609271523 val_avg_loss: tensor(2.0195)
epoch: 33 train_loss: tensor(1.3916, grad_fn=<NllLossBackward>) average train loss tensor(1.4834, grad_fn=<DivBackward0>)
val_acc: 0.4777336276674025 val_avg_loss: tensor(2.0111)
epoch: 34 train_loss: tensor(1.3915, grad_fn=<NllLossBackward>) average train loss tensor(1.4596, grad_fn=<DivBackward0>)
val_acc: 0.4789109639440765 val_avg_loss: tensor(2.0156)
epoch: 35 train_loss: tensor(1.2922, grad_fn=<NllLossBackward>) average train loss tensor(1.3998, grad_fn=<DivBackward0>)
val_acc: 0.47664459161147904 val_avg_loss: tensor(2.0283)
epoch: 36 train_loss: tensor(1.3101, grad_fn

val_acc: 0.478616629874908 val_avg_loss: tensor(2.3909)
epoch: 77 train_loss: tensor(0.7989, grad_fn=<NllLossBackward>) average train loss tensor(0.8366, grad_fn=<DivBackward0>)
val_acc: 0.48229580573951436 val_avg_loss: tensor(2.3860)
epoch: 78 train_loss: tensor(0.7090, grad_fn=<NllLossBackward>) average train loss tensor(0.7965, grad_fn=<DivBackward0>)
val_acc: 0.4816777041942605 val_avg_loss: tensor(2.3999)
epoch: 79 train_loss: tensor(0.7760, grad_fn=<NllLossBackward>) average train loss tensor(0.8089, grad_fn=<DivBackward0>)
val_acc: 0.47855776306107434 val_avg_loss: tensor(2.4014)
epoch: 80 train_loss: tensor(0.7165, grad_fn=<NllLossBackward>) average train loss tensor(0.7628, grad_fn=<DivBackward0>)
val_acc: 0.4780573951434879 val_avg_loss: tensor(2.4125)
epoch: 81 train_loss: tensor(0.7574, grad_fn=<NllLossBackward>) average train loss tensor(0.7761, grad_fn=<DivBackward0>)
val_acc: 0.47917586460632816 val_avg_loss: tensor(2.4220)
epoch: 82 train_loss: tensor(0.7232, grad_fn=<

val_acc: 0.47381898454746135 val_avg_loss: tensor(2.8366)
epoch: 123 train_loss: tensor(0.5214, grad_fn=<NllLossBackward>) average train loss tensor(0.5186, grad_fn=<DivBackward0>)
val_acc: 0.47411331861662986 val_avg_loss: tensor(2.8755)
epoch: 124 train_loss: tensor(0.4380, grad_fn=<NllLossBackward>) average train loss tensor(0.5240, grad_fn=<DivBackward0>)
val_acc: 0.4699337748344371 val_avg_loss: tensor(2.8821)
epoch: 125 train_loss: tensor(0.5296, grad_fn=<NllLossBackward>) average train loss tensor(0.5361, grad_fn=<DivBackward0>)
val_acc: 0.46937454010301694 val_avg_loss: tensor(2.8689)
epoch: 126 train_loss: tensor(0.4964, grad_fn=<NllLossBackward>) average train loss tensor(0.5174, grad_fn=<DivBackward0>)
val_acc: 0.4724944812362031 val_avg_loss: tensor(2.8714)
epoch: 127 train_loss: tensor(0.4766, grad_fn=<NllLossBackward>) average train loss tensor(0.5119, grad_fn=<DivBackward0>)
val_acc: 0.47264164827078736 val_avg_loss: tensor(2.8723)
epoch: 128 train_loss: tensor(0.4155, g

val_acc: 0.4719352465047829 val_avg_loss: tensor(2.3368)
epoch: 29 train_loss: tensor(0.9584, grad_fn=<NllLossBackward>) average train loss tensor(0.9969, grad_fn=<DivBackward0>)
val_acc: 0.4700515084621045 val_avg_loss: tensor(2.3663)
epoch: 30 train_loss: tensor(0.8731, grad_fn=<NllLossBackward>) average train loss tensor(0.9611, grad_fn=<DivBackward0>)
val_acc: 0.47096394407652686 val_avg_loss: tensor(2.4038)
epoch: 31 train_loss: tensor(0.8242, grad_fn=<NllLossBackward>) average train loss tensor(0.9084, grad_fn=<DivBackward0>)
val_acc: 0.4687858719646799 val_avg_loss: tensor(2.4348)
epoch: 32 train_loss: tensor(0.7732, grad_fn=<NllLossBackward>) average train loss tensor(0.8901, grad_fn=<DivBackward0>)
val_acc: 0.4661074319352465 val_avg_loss: tensor(2.4726)
epoch: 33 train_loss: tensor(0.7483, grad_fn=<NllLossBackward>) average train loss tensor(0.8584, grad_fn=<DivBackward0>)
val_acc: 0.4688447387785136 val_avg_loss: tensor(2.4899)
epoch: 34 train_loss: tensor(0.7933, grad_fn=<N

val_acc: 0.4458278145695364 val_avg_loss: tensor(3.9791)
epoch: 75 train_loss: tensor(0.2675, grad_fn=<NllLossBackward>) average train loss tensor(0.2654, grad_fn=<DivBackward0>)
val_acc: 0.4471228844738778 val_avg_loss: tensor(3.9755)
epoch: 76 train_loss: tensor(0.2264, grad_fn=<NllLossBackward>) average train loss tensor(0.2612, grad_fn=<DivBackward0>)
val_acc: 0.4480353200883002 val_avg_loss: tensor(3.9991)
epoch: 77 train_loss: tensor(0.2493, grad_fn=<NllLossBackward>) average train loss tensor(0.2578, grad_fn=<DivBackward0>)
val_acc: 0.4508609271523179 val_avg_loss: tensor(4.0098)
epoch: 78 train_loss: tensor(0.2441, grad_fn=<NllLossBackward>) average train loss tensor(0.2795, grad_fn=<DivBackward0>)
val_acc: 0.44824135393671816 val_avg_loss: tensor(4.0737)
epoch: 79 train_loss: tensor(0.2423, grad_fn=<NllLossBackward>) average train loss tensor(0.2319, grad_fn=<DivBackward0>)
val_acc: 0.44600441501103755 val_avg_loss: tensor(4.1167)
epoch: 80 train_loss: tensor(0.1786, grad_fn=<

val_acc: 0.44426784400294334 val_avg_loss: tensor(5.0849)
epoch: 121 train_loss: tensor(0.1198, grad_fn=<NllLossBackward>) average train loss tensor(0.1294, grad_fn=<DivBackward0>)
val_acc: 0.44379690949227374 val_avg_loss: tensor(5.1226)
epoch: 122 train_loss: tensor(0.1262, grad_fn=<NllLossBackward>) average train loss tensor(0.1285, grad_fn=<DivBackward0>)
val_acc: 0.441383370125092 val_avg_loss: tensor(5.1736)
epoch: 123 train_loss: tensor(0.1231, grad_fn=<NllLossBackward>) average train loss tensor(0.1440, grad_fn=<DivBackward0>)
val_acc: 0.44515084621044887 val_avg_loss: tensor(5.1387)
epoch: 124 train_loss: tensor(0.1083, grad_fn=<NllLossBackward>) average train loss tensor(0.1149, grad_fn=<DivBackward0>)
val_acc: 0.441972038263429 val_avg_loss: tensor(5.1484)
epoch: 125 train_loss: tensor(0.1221, grad_fn=<NllLossBackward>) average train loss tensor(0.1463, grad_fn=<DivBackward0>)
val_acc: 0.4407652685798381 val_avg_loss: tensor(5.1980)
epoch: 126 train_loss: tensor(0.1131, grad

val_acc: 0.4563061074319352 val_avg_loss: tensor(2.2058)
epoch: 27 train_loss: tensor(1.2899, grad_fn=<NllLossBackward>) average train loss tensor(1.3835, grad_fn=<DivBackward0>)
val_acc: 0.4627520235467255 val_avg_loss: tensor(2.1876)
epoch: 28 train_loss: tensor(1.2327, grad_fn=<NllLossBackward>) average train loss tensor(1.2994, grad_fn=<DivBackward0>)
val_acc: 0.4632818248712288 val_avg_loss: tensor(2.1873)
epoch: 29 train_loss: tensor(1.2443, grad_fn=<NllLossBackward>) average train loss tensor(1.2691, grad_fn=<DivBackward0>)
val_acc: 0.46239882266372334 val_avg_loss: tensor(2.2197)
epoch: 30 train_loss: tensor(1.0999, grad_fn=<NllLossBackward>) average train loss tensor(1.1986, grad_fn=<DivBackward0>)
val_acc: 0.4644885945548197 val_avg_loss: tensor(2.2494)
epoch: 31 train_loss: tensor(1.0520, grad_fn=<NllLossBackward>) average train loss tensor(1.1190, grad_fn=<DivBackward0>)
val_acc: 0.46357615894039733 val_avg_loss: tensor(2.3022)
epoch: 32 train_loss: tensor(1.0576, grad_fn=<

val_acc: 0.44032376747608537 val_avg_loss: tensor(3.9628)
epoch: 73 train_loss: tensor(0.2483, grad_fn=<NllLossBackward>) average train loss tensor(0.2539, grad_fn=<DivBackward0>)
val_acc: 0.44211920529801324 val_avg_loss: tensor(3.9786)
epoch: 74 train_loss: tensor(0.2637, grad_fn=<NllLossBackward>) average train loss tensor(0.2623, grad_fn=<DivBackward0>)
val_acc: 0.44235467255334804 val_avg_loss: tensor(4.0135)
epoch: 75 train_loss: tensor(0.2359, grad_fn=<NllLossBackward>) average train loss tensor(0.2202, grad_fn=<DivBackward0>)
val_acc: 0.4412950699043414 val_avg_loss: tensor(4.1018)
epoch: 76 train_loss: tensor(0.2091, grad_fn=<NllLossBackward>) average train loss tensor(0.2468, grad_fn=<DivBackward0>)
val_acc: 0.4397645327446652 val_avg_loss: tensor(4.1985)
epoch: 77 train_loss: tensor(0.2243, grad_fn=<NllLossBackward>) average train loss tensor(0.2261, grad_fn=<DivBackward0>)
val_acc: 0.4392347314201619 val_avg_loss: tensor(4.2250)
epoch: 78 train_loss: tensor(0.2137, grad_fn=

val_acc: 0.43473142016188376 val_avg_loss: tensor(5.0612)
epoch: 119 train_loss: tensor(0.0794, grad_fn=<NllLossBackward>) average train loss tensor(0.0953, grad_fn=<DivBackward0>)
val_acc: 0.4385577630610743 val_avg_loss: tensor(5.0963)
epoch: 120 train_loss: tensor(0.1039, grad_fn=<NllLossBackward>) average train loss tensor(0.0971, grad_fn=<DivBackward0>)
val_acc: 0.43844002943340693 val_avg_loss: tensor(5.1821)
epoch: 121 train_loss: tensor(0.1152, grad_fn=<NllLossBackward>) average train loss tensor(0.1283, grad_fn=<DivBackward0>)
val_acc: 0.4360853568800589 val_avg_loss: tensor(5.2550)
epoch: 122 train_loss: tensor(0.1261, grad_fn=<NllLossBackward>) average train loss tensor(0.1126, grad_fn=<DivBackward0>)
val_acc: 0.4355849889624724 val_avg_loss: tensor(5.2551)
epoch: 123 train_loss: tensor(0.0928, grad_fn=<NllLossBackward>) average train loss tensor(0.1226, grad_fn=<DivBackward0>)
val_acc: 0.43414275202354674 val_avg_loss: tensor(5.1819)
epoch: 124 train_loss: tensor(0.0892, gr

val_acc: 0.4664311994113319 val_avg_loss: tensor(2.0243)
epoch: 25 train_loss: tensor(1.5879, grad_fn=<NllLossBackward>) average train loss tensor(1.7211, grad_fn=<DivBackward0>)
val_acc: 0.4690213392200147 val_avg_loss: tensor(2.0141)
epoch: 26 train_loss: tensor(1.5670, grad_fn=<NllLossBackward>) average train loss tensor(1.6549, grad_fn=<DivBackward0>)
val_acc: 0.47172921265636497 val_avg_loss: tensor(2.0115)
epoch: 27 train_loss: tensor(1.5610, grad_fn=<NllLossBackward>) average train loss tensor(1.6623, grad_fn=<DivBackward0>)
val_acc: 0.4723767476085357 val_avg_loss: tensor(2.0064)
epoch: 28 train_loss: tensor(1.5822, grad_fn=<NllLossBackward>) average train loss tensor(1.6294, grad_fn=<DivBackward0>)
val_acc: 0.4739367181751288 val_avg_loss: tensor(2.0006)
epoch: 29 train_loss: tensor(1.5302, grad_fn=<NllLossBackward>) average train loss tensor(1.5847, grad_fn=<DivBackward0>)
val_acc: 0.4763208241353937 val_avg_loss: tensor(2.0047)
epoch: 30 train_loss: tensor(1.4749, grad_fn=<N

val_acc: 0.48491537895511405 val_avg_loss: tensor(2.3451)
epoch: 71 train_loss: tensor(0.8476, grad_fn=<NllLossBackward>) average train loss tensor(0.8724, grad_fn=<DivBackward0>)
val_acc: 0.4831788079470199 val_avg_loss: tensor(2.3598)
epoch: 72 train_loss: tensor(0.7374, grad_fn=<NllLossBackward>) average train loss tensor(0.8440, grad_fn=<DivBackward0>)
val_acc: 0.4860338484179544 val_avg_loss: tensor(2.3625)
epoch: 73 train_loss: tensor(0.7164, grad_fn=<NllLossBackward>) average train loss tensor(0.7991, grad_fn=<DivBackward0>)
val_acc: 0.48774098601913174 val_avg_loss: tensor(2.3610)
epoch: 74 train_loss: tensor(0.7535, grad_fn=<NllLossBackward>) average train loss tensor(0.8231, grad_fn=<DivBackward0>)
val_acc: 0.4816777041942605 val_avg_loss: tensor(2.3721)
epoch: 75 train_loss: tensor(0.7130, grad_fn=<NllLossBackward>) average train loss tensor(0.8074, grad_fn=<DivBackward0>)
val_acc: 0.4811184694628403 val_avg_loss: tensor(2.3933)
epoch: 76 train_loss: tensor(0.8366, grad_fn=<

val_acc: 0.4814128035320088 val_avg_loss: tensor(2.8208)
epoch: 117 train_loss: tensor(0.4968, grad_fn=<NllLossBackward>) average train loss tensor(0.5549, grad_fn=<DivBackward0>)
val_acc: 0.47926416482707873 val_avg_loss: tensor(2.8226)
epoch: 118 train_loss: tensor(0.4488, grad_fn=<NllLossBackward>) average train loss tensor(0.5107, grad_fn=<DivBackward0>)
val_acc: 0.4787637969094923 val_avg_loss: tensor(2.8227)
epoch: 119 train_loss: tensor(0.4641, grad_fn=<NllLossBackward>) average train loss tensor(0.5116, grad_fn=<DivBackward0>)
val_acc: 0.47793966151582046 val_avg_loss: tensor(2.8507)
epoch: 120 train_loss: tensor(0.4801, grad_fn=<NllLossBackward>) average train loss tensor(0.5295, grad_fn=<DivBackward0>)
val_acc: 0.4780573951434879 val_avg_loss: tensor(2.8720)
epoch: 121 train_loss: tensor(0.5278, grad_fn=<NllLossBackward>) average train loss tensor(0.5273, grad_fn=<DivBackward0>)
val_acc: 0.47970566593083147 val_avg_loss: tensor(2.8708)
epoch: 122 train_loss: tensor(0.5056, gr

KeyboardInterrupt: 

In [71]:
for autoencoder_name in ['trivial', 'complex']:
    cur_autoencoder_full = copy.deepcopy(autoencoder_dict[autoencoder_name])
    cur_encoder_full = cur_autoencoder_full.encoder
        
    cur_after_encoder_model_full = AfterEncoderModel(cur_encoder_full, d=128, drop=0.5)
    cur_optimizer_aem_full = torch.optim.Adam(cur_after_encoder_model_full.parameters(), lr=1e-3)
        
    writer = SummaryWriter('runs/aem_full' + autoencoder_name + '_' + str(rs) + '_bs2048_rs42_d128_wd0005_drop05_100')

    torch_models.fit_topics_model(
        model=cur_after_encoder_model_full,
        optimizer=cur_optimizer_aem_full,
        epochs=100,
        writer=writer,
        train_loader=train_loader,
        val_loader=val_loader
    )  

epoch: 0 train_loss: tensor(2.2325, grad_fn=<NllLossBackward>) average train loss tensor(2.9030, grad_fn=<DivBackward0>)
val_acc: 0.49792494481236205 val_avg_loss: tensor(1.8804)
epoch: 1 train_loss: tensor(1.9070, grad_fn=<NllLossBackward>) average train loss tensor(2.0523, grad_fn=<DivBackward0>)
val_acc: 0.5554966887417219 val_avg_loss: tensor(1.6535)
epoch: 2 train_loss: tensor(1.8138, grad_fn=<NllLossBackward>) average train loss tensor(1.9079, grad_fn=<DivBackward0>)
val_acc: 0.576953642384106 val_avg_loss: tensor(1.5839)
epoch: 3 train_loss: tensor(1.7867, grad_fn=<NllLossBackward>) average train loss tensor(1.8438, grad_fn=<DivBackward0>)
val_acc: 0.5840765268579838 val_avg_loss: tensor(1.5450)
epoch: 4 train_loss: tensor(1.7154, grad_fn=<NllLossBackward>) average train loss tensor(1.7971, grad_fn=<DivBackward0>)
val_acc: 0.5900515084621045 val_avg_loss: tensor(1.5182)
epoch: 5 train_loss: tensor(1.7385, grad_fn=<NllLossBackward>) average train loss tensor(1.7662, grad_fn=<DivB

epoch: 46 train_loss: tensor(1.3499, grad_fn=<NllLossBackward>) average train loss tensor(1.4947, grad_fn=<DivBackward0>)
val_acc: 0.6246946284032376 val_avg_loss: tensor(1.3648)
epoch: 47 train_loss: tensor(1.3661, grad_fn=<NllLossBackward>) average train loss tensor(1.4951, grad_fn=<DivBackward0>)
val_acc: 0.6245180279617366 val_avg_loss: tensor(1.3644)
epoch: 48 train_loss: tensor(1.2814, grad_fn=<NllLossBackward>) average train loss tensor(1.4886, grad_fn=<DivBackward0>)
val_acc: 0.6254598969830758 val_avg_loss: tensor(1.3607)
epoch: 49 train_loss: tensor(1.3180, grad_fn=<NllLossBackward>) average train loss tensor(1.4828, grad_fn=<DivBackward0>)
val_acc: 0.6242825607064018 val_avg_loss: tensor(1.3627)
epoch: 50 train_loss: tensor(1.2930, grad_fn=<NllLossBackward>) average train loss tensor(1.4815, grad_fn=<DivBackward0>)
val_acc: 0.6253715967623252 val_avg_loss: tensor(1.3602)
epoch: 51 train_loss: tensor(1.3329, grad_fn=<NllLossBackward>) average train loss tensor(1.4764, grad_fn

epoch: 92 train_loss: tensor(1.1536, grad_fn=<NllLossBackward>) average train loss tensor(1.4025, grad_fn=<DivBackward0>)
val_acc: 0.6260779985283297 val_avg_loss: tensor(1.3488)
epoch: 93 train_loss: tensor(1.1414, grad_fn=<NllLossBackward>) average train loss tensor(1.4001, grad_fn=<DivBackward0>)
val_acc: 0.6272259013980869 val_avg_loss: tensor(1.3490)
epoch: 94 train_loss: tensor(1.1609, grad_fn=<NllLossBackward>) average train loss tensor(1.3953, grad_fn=<DivBackward0>)
val_acc: 0.6270198675496689 val_avg_loss: tensor(1.3478)
epoch: 95 train_loss: tensor(1.1853, grad_fn=<NllLossBackward>) average train loss tensor(1.3988, grad_fn=<DivBackward0>)
val_acc: 0.626990434142752 val_avg_loss: tensor(1.3486)
epoch: 96 train_loss: tensor(1.1346, grad_fn=<NllLossBackward>) average train loss tensor(1.3948, grad_fn=<DivBackward0>)
val_acc: 0.6258719646799117 val_avg_loss: tensor(1.3488)
epoch: 97 train_loss: tensor(1.1848, grad_fn=<NllLossBackward>) average train loss tensor(1.3954, grad_fn=

epoch: 38 train_loss: tensor(1.3165, grad_fn=<NllLossBackward>) average train loss tensor(1.5155, grad_fn=<DivBackward0>)
val_acc: 0.6213686534216335 val_avg_loss: tensor(1.3751)
epoch: 39 train_loss: tensor(1.3419, grad_fn=<NllLossBackward>) average train loss tensor(1.5127, grad_fn=<DivBackward0>)
val_acc: 0.6223988226637234 val_avg_loss: tensor(1.3718)
epoch: 40 train_loss: tensor(1.3924, grad_fn=<NllLossBackward>) average train loss tensor(1.5126, grad_fn=<DivBackward0>)
val_acc: 0.6228108903605593 val_avg_loss: tensor(1.3697)
epoch: 41 train_loss: tensor(1.3734, grad_fn=<NllLossBackward>) average train loss tensor(1.5064, grad_fn=<DivBackward0>)
val_acc: 0.6221044885945548 val_avg_loss: tensor(1.3712)
epoch: 42 train_loss: tensor(1.3605, grad_fn=<NllLossBackward>) average train loss tensor(1.5035, grad_fn=<DivBackward0>)
val_acc: 0.6239293598233996 val_avg_loss: tensor(1.3697)
epoch: 43 train_loss: tensor(1.3585, grad_fn=<NllLossBackward>) average train loss tensor(1.5027, grad_fn

epoch: 84 train_loss: tensor(1.1849, grad_fn=<NllLossBackward>) average train loss tensor(1.4076, grad_fn=<DivBackward0>)
val_acc: 0.6261662987490803 val_avg_loss: tensor(1.3494)
epoch: 85 train_loss: tensor(1.1882, grad_fn=<NllLossBackward>) average train loss tensor(1.4069, grad_fn=<DivBackward0>)
val_acc: 0.6265783664459161 val_avg_loss: tensor(1.3517)
epoch: 86 train_loss: tensor(1.1839, grad_fn=<NllLossBackward>) average train loss tensor(1.4104, grad_fn=<DivBackward0>)
val_acc: 0.6264311994113319 val_avg_loss: tensor(1.3508)
epoch: 87 train_loss: tensor(1.2100, grad_fn=<NllLossBackward>) average train loss tensor(1.4104, grad_fn=<DivBackward0>)
val_acc: 0.6257542310522443 val_avg_loss: tensor(1.3501)
epoch: 88 train_loss: tensor(1.1927, grad_fn=<NllLossBackward>) average train loss tensor(1.4059, grad_fn=<DivBackward0>)
val_acc: 0.6256953642384105 val_avg_loss: tensor(1.3491)
epoch: 89 train_loss: tensor(1.1571, grad_fn=<NllLossBackward>) average train loss tensor(1.4029, grad_fn

### wider autoencoder (maximum similarity to norm model)

In [73]:
autoencoder_c_256 = AutoencoderComplex(d=256)
optimizer_c_256 = torch.optim.Adam(autoencoder_c_256.parameters(), lr=1e-3)

stat = fit_autoencoder(autoencoder_c_256, optimizer_c_256, 100, [x_img_train, x_txt_train], [x_img_val, x_txt_val])


train img loss: 0.567 txt_loss: 0.618 img + txt loss 1.185
val img loss: 0.343 val txt_loss: 0.409 img + txt loss 0.752
train img loss: 0.267 txt_loss: 0.326 img + txt loss 0.592
val img loss: 0.214 val txt_loss: 0.264 img + txt loss 0.479
train img loss: 0.187 txt_loss: 0.229 img + txt loss 0.416
val img loss: 0.167 val txt_loss: 0.201 img + txt loss 0.368
train img loss: 0.153 txt_loss: 0.181 img + txt loss 0.334
val img loss: 0.143 val txt_loss: 0.165 img + txt loss 0.308
train img loss: 0.134 txt_loss: 0.151 img + txt loss 0.285
val img loss: 0.128 val txt_loss: 0.141 img + txt loss 0.268
train img loss: 0.121 txt_loss: 0.131 img + txt loss 0.252
val img loss: 0.117 val txt_loss: 0.124 img + txt loss 0.241
train img loss: 0.111 txt_loss: 0.116 img + txt loss 0.228
val img loss: 0.109 val txt_loss: 0.111 img + txt loss 0.219
train img loss: 0.104 txt_loss: 0.104 img + txt loss 0.208
val img loss: 0.102 val txt_loss: 0.099 img + txt loss 0.201
train img loss: 0.098 txt_loss: 0.093 im

val img loss: 0.072 val txt_loss: 0.041 img + txt loss 0.114
train img loss: 0.072 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.072 val txt_loss: 0.041 img + txt loss 0.114
train img loss: 0.071 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.073 val txt_loss: 0.041 img + txt loss 0.114
train img loss: 0.072 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.072 val txt_loss: 0.041 img + txt loss 0.113
train img loss: 0.072 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.073 val txt_loss: 0.041 img + txt loss 0.114
train img loss: 0.071 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.072 val txt_loss: 0.041 img + txt loss 0.113
train img loss: 0.072 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.072 val txt_loss: 0.041 img + txt loss 0.114
train img loss: 0.071 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.072 val txt_loss: 0.041 img + txt loss 0.113
train img loss: 0.072 txt_loss: 0.040 img + txt loss 0.112
val img loss: 0.073 val txt_loss: 0.041 

In [74]:
class AfterEncoderModel2(nn.Module):
    def __init__(self, encoder, d=128, drop=0.5):
        super().__init__()
        
        self.encoder = encoder
            
        self.fc1 = nn.Linear(d * 4, d)
        self.fc2 = nn.Linear(d, d)
        self.out = nn.Linear(d, N_CLASSES)

        self.dropout = nn.modules.Dropout(p=drop)

    def forward(self, inp_img, inp_txt):
        x = self.encoder(inp_img, inp_txt)
        x = self.dropout(x)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        
        x = F.relu(self.fc2(x))
        x = F.log_softmax(self.out(x), dim=1)
        return x

In [79]:
class AfterEncoderModel2BN(nn.Module):
    def __init__(self, encoder, d=128, drop=0.5):
        super().__init__()
        
        self.encoder = encoder
        self.bn0 = nn.BatchNorm1d(num_features=d * 4)
        self.fc1 = nn.Linear(d * 4, d)
        self.bn1 = nn.BatchNorm1d(num_features=d)
        self.fc2 = nn.Linear(d, d)
        self.bn2 = nn.BatchNorm1d(num_features=d)
        self.out = nn.Linear(d, N_CLASSES)

        self.dropout = nn.modules.Dropout(p=drop)

    def forward(self, inp_img, inp_txt):
        x = self.encoder(inp_img, inp_txt)
        x = self.dropout(self.bn0(x))
        
        x = F.relu(self.fc1(x))
        x = self.dropout(self.bn1(x))
        
        x = F.relu(self.fc2(x))
        x = self.bn2(x)
        x = F.log_softmax(self.out(x), dim=1)
        return x

In [75]:
encoder_c_256 = copy.deepcopy(autoencoder_c_256.encoder)
        
after_encoder_model_c_256 = AfterEncoderModel2(encoder_c_256, d=128, drop=0.5)
optimizer_aem_c_256 = torch.optim.Adam(after_encoder_model_c_256.parameters(), lr=1e-3)
        
writer = SummaryWriter('runs/aem2_full_c_256_bs2048_rs42_d128_drop05_100')

torch_models.fit_topics_model(
    model=after_encoder_model_c_256,
    optimizer=optimizer_aem_c_256,
    epochs=100,
    writer=writer,
    train_loader=train_loader,
    val_loader=val_loader
)  

epoch: 0 train_loss: tensor(2.1380, grad_fn=<NllLossBackward>) average train loss tensor(2.7222, grad_fn=<DivBackward0>)
val_acc: 0.5221486387049301 val_avg_loss: tensor(1.7722)
epoch: 1 train_loss: tensor(1.8335, grad_fn=<NllLossBackward>) average train loss tensor(1.9462, grad_fn=<DivBackward0>)
val_acc: 0.5705960264900662 val_avg_loss: tensor(1.5953)
epoch: 2 train_loss: tensor(1.8083, grad_fn=<NllLossBackward>) average train loss tensor(1.8222, grad_fn=<DivBackward0>)
val_acc: 0.5823105224429728 val_avg_loss: tensor(1.5435)
epoch: 3 train_loss: tensor(1.7105, grad_fn=<NllLossBackward>) average train loss tensor(1.7596, grad_fn=<DivBackward0>)
val_acc: 0.5935540838852097 val_avg_loss: tensor(1.5034)
epoch: 4 train_loss: tensor(1.6634, grad_fn=<NllLossBackward>) average train loss tensor(1.7258, grad_fn=<DivBackward0>)
val_acc: 0.5985871964679912 val_avg_loss: tensor(1.4822)
epoch: 5 train_loss: tensor(1.6554, grad_fn=<NllLossBackward>) average train loss tensor(1.6953, grad_fn=<DivB

epoch: 46 train_loss: tensor(1.2151, grad_fn=<NllLossBackward>) average train loss tensor(1.4092, grad_fn=<DivBackward0>)
val_acc: 0.6290802060338484 val_avg_loss: tensor(1.3362)
epoch: 47 train_loss: tensor(1.2394, grad_fn=<NllLossBackward>) average train loss tensor(1.4048, grad_fn=<DivBackward0>)
val_acc: 0.6287270051508462 val_avg_loss: tensor(1.3368)
epoch: 48 train_loss: tensor(1.2155, grad_fn=<NllLossBackward>) average train loss tensor(1.4059, grad_fn=<DivBackward0>)
val_acc: 0.6289036055923473 val_avg_loss: tensor(1.3374)
epoch: 49 train_loss: tensor(1.1734, grad_fn=<NllLossBackward>) average train loss tensor(1.4003, grad_fn=<DivBackward0>)
val_acc: 0.6292273730684327 val_avg_loss: tensor(1.3355)
epoch: 50 train_loss: tensor(1.1954, grad_fn=<NllLossBackward>) average train loss tensor(1.3992, grad_fn=<DivBackward0>)
val_acc: 0.630522442972774 val_avg_loss: tensor(1.3357)
epoch: 51 train_loss: tensor(1.1943, grad_fn=<NllLossBackward>) average train loss tensor(1.3943, grad_fn=

epoch: 92 train_loss: tensor(1.0466, grad_fn=<NllLossBackward>) average train loss tensor(1.2969, grad_fn=<DivBackward0>)
val_acc: 0.6310228108903606 val_avg_loss: tensor(1.3246)
epoch: 93 train_loss: tensor(1.0175, grad_fn=<NllLossBackward>) average train loss tensor(1.2926, grad_fn=<DivBackward0>)
val_acc: 0.63252391464312 val_avg_loss: tensor(1.3221)
epoch: 94 train_loss: tensor(0.9986, grad_fn=<NllLossBackward>) average train loss tensor(1.2922, grad_fn=<DivBackward0>)
val_acc: 0.6324356144223694 val_avg_loss: tensor(1.3249)
epoch: 95 train_loss: tensor(1.0250, grad_fn=<NllLossBackward>) average train loss tensor(1.2933, grad_fn=<DivBackward0>)
val_acc: 0.6324944812362031 val_avg_loss: tensor(1.3234)
epoch: 96 train_loss: tensor(0.9667, grad_fn=<NllLossBackward>) average train loss tensor(1.2851, grad_fn=<DivBackward0>)
val_acc: 0.6322001471670345 val_avg_loss: tensor(1.3251)
epoch: 97 train_loss: tensor(0.9618, grad_fn=<NllLossBackward>) average train loss tensor(1.2878, grad_fn=<

In [76]:
encoder_c_256_wd = copy.deepcopy(autoencoder_c_256.encoder)
        
after_encoder_model_c_256_wd = AfterEncoderModel2(encoder_c_256_wd, d=128, drop=0.5)
optimizer_aem_c_256_wd = torch.optim.Adam(after_encoder_model_c_256_wd.parameters(), lr=1e-3, weight_decay=0.0005)
        
writer = SummaryWriter('runs/aem2_full_c_256_bs2048_rs42_d128_drop05_wd0005_100')

torch_models.fit_topics_model(
    model=after_encoder_model_c_256_wd,
    optimizer=optimizer_aem_c_256_wd,
    epochs=100,
    writer=writer,
    train_loader=train_loader,
    val_loader=val_loader
) 

epoch: 0 train_loss: tensor(2.0852, grad_fn=<NllLossBackward>) average train loss tensor(2.7593, grad_fn=<DivBackward0>)
val_acc: 0.5146431199411332 val_avg_loss: tensor(1.8053)
epoch: 1 train_loss: tensor(1.8677, grad_fn=<NllLossBackward>) average train loss tensor(1.9506, grad_fn=<DivBackward0>)
val_acc: 0.5687122884473877 val_avg_loss: tensor(1.5950)
epoch: 2 train_loss: tensor(1.7715, grad_fn=<NllLossBackward>) average train loss tensor(1.8139, grad_fn=<DivBackward0>)
val_acc: 0.5848417954378219 val_avg_loss: tensor(1.5280)
epoch: 3 train_loss: tensor(1.6716, grad_fn=<NllLossBackward>) average train loss tensor(1.7510, grad_fn=<DivBackward0>)
val_acc: 0.5928771155261221 val_avg_loss: tensor(1.4999)
epoch: 4 train_loss: tensor(1.6499, grad_fn=<NllLossBackward>) average train loss tensor(1.7125, grad_fn=<DivBackward0>)
val_acc: 0.5967328918322296 val_avg_loss: tensor(1.4800)
epoch: 5 train_loss: tensor(1.5739, grad_fn=<NllLossBackward>) average train loss tensor(1.6850, grad_fn=<DivB

epoch: 46 train_loss: tensor(1.2398, grad_fn=<NllLossBackward>) average train loss tensor(1.4201, grad_fn=<DivBackward0>)
val_acc: 0.6280500367917586 val_avg_loss: tensor(1.3424)
epoch: 47 train_loss: tensor(1.2275, grad_fn=<NllLossBackward>) average train loss tensor(1.4178, grad_fn=<DivBackward0>)
val_acc: 0.627785136129507 val_avg_loss: tensor(1.3407)
epoch: 48 train_loss: tensor(1.2646, grad_fn=<NllLossBackward>) average train loss tensor(1.4186, grad_fn=<DivBackward0>)
val_acc: 0.6278734363502575 val_avg_loss: tensor(1.3433)
epoch: 49 train_loss: tensor(1.2030, grad_fn=<NllLossBackward>) average train loss tensor(1.4146, grad_fn=<DivBackward0>)
val_acc: 0.6282266372332598 val_avg_loss: tensor(1.3435)
epoch: 50 train_loss: tensor(1.1889, grad_fn=<NllLossBackward>) average train loss tensor(1.4130, grad_fn=<DivBackward0>)
val_acc: 0.6271376011773363 val_avg_loss: tensor(1.3424)
epoch: 51 train_loss: tensor(1.1937, grad_fn=<NllLossBackward>) average train loss tensor(1.4114, grad_fn=

epoch: 92 train_loss: tensor(1.1189, grad_fn=<NllLossBackward>) average train loss tensor(1.3611, grad_fn=<DivBackward0>)
val_acc: 0.6318763796909492 val_avg_loss: tensor(1.3310)
epoch: 93 train_loss: tensor(1.0899, grad_fn=<NllLossBackward>) average train loss tensor(1.3613, grad_fn=<DivBackward0>)
val_acc: 0.6324061810154525 val_avg_loss: tensor(1.3270)
epoch: 94 train_loss: tensor(1.1107, grad_fn=<NllLossBackward>) average train loss tensor(1.3595, grad_fn=<DivBackward0>)
val_acc: 0.6316409124356144 val_avg_loss: tensor(1.3303)
epoch: 95 train_loss: tensor(1.1115, grad_fn=<NllLossBackward>) average train loss tensor(1.3561, grad_fn=<DivBackward0>)
val_acc: 0.6312877115526122 val_avg_loss: tensor(1.3295)
epoch: 96 train_loss: tensor(1.1393, grad_fn=<NllLossBackward>) average train loss tensor(1.3543, grad_fn=<DivBackward0>)
val_acc: 0.6320529801324504 val_avg_loss: tensor(1.3286)
epoch: 97 train_loss: tensor(1.0952, grad_fn=<NllLossBackward>) average train loss tensor(1.3577, grad_fn

In [77]:
encoder_c_256_wd_2000 = copy.deepcopy(autoencoder_c_256.encoder)
        
after_encoder_model_c_256_wd_2000 = AfterEncoderModel2(encoder_c_256_wd_2000, d=128, drop=0.5)
optimizer_aem_c_256_wd_2000 = torch.optim.Adam(after_encoder_model_c_256_wd_2000.parameters(), lr=1e-3, weight_decay=0.0005)
        
writer = SummaryWriter('runs/aem2_2000_c_256_bs2048_rs42_d128_drop05_wd0005_100')

torch_models.fit_topics_model(
    model=after_encoder_model_c_256_wd_2000,
    optimizer=optimizer_aem_c_256_wd_2000,
    epochs=150,
    writer=writer,
    train_loader=train_loader_2000,
    val_loader=val_loader
) 

epoch: 0 train_loss: tensor(3.8078, grad_fn=<NllLossBackward>) average train loss tensor(3.8810, grad_fn=<DivBackward0>)
val_acc: 0.08688741721854305 val_avg_loss: tensor(3.7525)
epoch: 1 train_loss: tensor(3.6504, grad_fn=<NllLossBackward>) average train loss tensor(3.6963, grad_fn=<DivBackward0>)
val_acc: 0.13200883002207506 val_avg_loss: tensor(3.5889)
epoch: 2 train_loss: tensor(3.4808, grad_fn=<NllLossBackward>) average train loss tensor(3.5614, grad_fn=<DivBackward0>)
val_acc: 0.1746578366445916 val_avg_loss: tensor(3.4441)
epoch: 3 train_loss: tensor(3.3523, grad_fn=<NllLossBackward>) average train loss tensor(3.4017, grad_fn=<DivBackward0>)
val_acc: 0.2427373068432671 val_avg_loss: tensor(3.2909)
epoch: 4 train_loss: tensor(3.1856, grad_fn=<NllLossBackward>) average train loss tensor(3.2596, grad_fn=<DivBackward0>)
val_acc: 0.25901398086828553 val_avg_loss: tensor(3.1374)
epoch: 5 train_loss: tensor(3.0573, grad_fn=<NllLossBackward>) average train loss tensor(3.1272, grad_fn=<D

epoch: 46 train_loss: tensor(0.7724, grad_fn=<NllLossBackward>) average train loss tensor(0.8505, grad_fn=<DivBackward0>)
val_acc: 0.4861810154525386 val_avg_loss: tensor(2.2290)
epoch: 47 train_loss: tensor(0.8126, grad_fn=<NllLossBackward>) average train loss tensor(0.8611, grad_fn=<DivBackward0>)
val_acc: 0.48538631346578365 val_avg_loss: tensor(2.2224)
epoch: 48 train_loss: tensor(0.7241, grad_fn=<NllLossBackward>) average train loss tensor(0.8139, grad_fn=<DivBackward0>)
val_acc: 0.483944076526858 val_avg_loss: tensor(2.2390)
epoch: 49 train_loss: tensor(0.8217, grad_fn=<NllLossBackward>) average train loss tensor(0.8322, grad_fn=<DivBackward0>)
val_acc: 0.48515084621044885 val_avg_loss: tensor(2.2730)
epoch: 50 train_loss: tensor(0.7340, grad_fn=<NllLossBackward>) average train loss tensor(0.7984, grad_fn=<DivBackward0>)
val_acc: 0.4840323767476085 val_avg_loss: tensor(2.2836)
epoch: 51 train_loss: tensor(0.6794, grad_fn=<NllLossBackward>) average train loss tensor(0.7841, grad_f

val_acc: 0.4772038263428992 val_avg_loss: tensor(2.8618)
epoch: 93 train_loss: tensor(0.3567, grad_fn=<NllLossBackward>) average train loss tensor(0.3959, grad_fn=<DivBackward0>)
val_acc: 0.47764532744665195 val_avg_loss: tensor(2.8741)
epoch: 94 train_loss: tensor(0.3177, grad_fn=<NllLossBackward>) average train loss tensor(0.3947, grad_fn=<DivBackward0>)
val_acc: 0.4768211920529801 val_avg_loss: tensor(2.8841)
epoch: 95 train_loss: tensor(0.3443, grad_fn=<NllLossBackward>) average train loss tensor(0.3900, grad_fn=<DivBackward0>)
val_acc: 0.476055923473142 val_avg_loss: tensor(2.9000)
epoch: 96 train_loss: tensor(0.3286, grad_fn=<NllLossBackward>) average train loss tensor(0.3766, grad_fn=<DivBackward0>)
val_acc: 0.4770566593083149 val_avg_loss: tensor(2.9173)
epoch: 97 train_loss: tensor(0.3123, grad_fn=<NllLossBackward>) average train loss tensor(0.3780, grad_fn=<DivBackward0>)
val_acc: 0.47764532744665195 val_avg_loss: tensor(2.9187)
epoch: 98 train_loss: tensor(0.2831, grad_fn=<N

val_acc: 0.47220014716703457 val_avg_loss: tensor(3.3470)
epoch: 139 train_loss: tensor(0.2061, grad_fn=<NllLossBackward>) average train loss tensor(0.2680, grad_fn=<DivBackward0>)
val_acc: 0.473701250919794 val_avg_loss: tensor(3.3273)
epoch: 140 train_loss: tensor(0.2232, grad_fn=<NllLossBackward>) average train loss tensor(0.2372, grad_fn=<DivBackward0>)
val_acc: 0.47220014716703457 val_avg_loss: tensor(3.3453)
epoch: 141 train_loss: tensor(0.1922, grad_fn=<NllLossBackward>) average train loss tensor(0.2106, grad_fn=<DivBackward0>)
val_acc: 0.4714643119941133 val_avg_loss: tensor(3.3346)
epoch: 142 train_loss: tensor(0.2550, grad_fn=<NllLossBackward>) average train loss tensor(0.2474, grad_fn=<DivBackward0>)
val_acc: 0.47299484915378953 val_avg_loss: tensor(3.3458)
epoch: 143 train_loss: tensor(0.1932, grad_fn=<NllLossBackward>) average train loss tensor(0.2217, grad_fn=<DivBackward0>)
val_acc: 0.4749963208241354 val_avg_loss: tensor(3.3816)
epoch: 144 train_loss: tensor(0.2067, gra

In [None]:
encoder_c_256_wd_2000 = copy.deepcopy(autoencoder_c_256.encoder)
        
after_encoder_model_c_256_wd_2000 = AfterEncoderModel2BN(encoder_c_256_wd_2000, d=128, drop=0.5)
optimizer_aem_c_256_wd_2000 = torch.optim.Adam(after_encoder_model_c_256_wd_2000.parameters(), lr=1e-3, weight_decay=0.0005)
        
writer = SummaryWriter('runs/aem2bn_2000_c_256_bs2048_rs42_d128_drop05_wd0005_100')

torch_models.fit_topics_model(
    model=after_encoder_model_c_256_wd_2000,
    optimizer=optimizer_aem_c_256_wd_2000,
    epochs=80,
    writer=writer,
    train_loader=train_loader_2000,
    val_loader=val_loader
) 

epoch: 0 train_loss: tensor(3.8102, grad_fn=<NllLossBackward>) average train loss tensor(3.9508, grad_fn=<DivBackward0>)
val_acc: 0.060456217807211186 val_avg_loss: tensor(3.8479)
epoch: 1 train_loss: tensor(3.4707, grad_fn=<NllLossBackward>) average train loss tensor(3.5518, grad_fn=<DivBackward0>)
val_acc: 0.21816041206769685 val_avg_loss: tensor(3.6711)
epoch: 2 train_loss: tensor(3.1964, grad_fn=<NllLossBackward>) average train loss tensor(3.2637, grad_fn=<DivBackward0>)
val_acc: 0.31620309050772627 val_avg_loss: tensor(3.4199)
epoch: 3 train_loss: tensor(2.9629, grad_fn=<NllLossBackward>) average train loss tensor(3.0200, grad_fn=<DivBackward0>)
val_acc: 0.3619131714495953 val_avg_loss: tensor(3.1346)
epoch: 4 train_loss: tensor(2.7414, grad_fn=<NllLossBackward>) average train loss tensor(2.8060, grad_fn=<DivBackward0>)
val_acc: 0.38631346578366443 val_avg_loss: tensor(2.8746)
epoch: 5 train_loss: tensor(2.5161, grad_fn=<NllLossBackward>) average train loss tensor(2.6092, grad_fn=

## Autoencoder similar to beggining of actual model

In [24]:
class EncoderComplex(nn.Module):
    def __init__(self, d, drop=0.5):
        super().__init__()
        self.fc_img_1 = nn.Linear(IMG_LEN, d * 4)
        self.fc_img_2 = nn.Linear(d * 4, d * 2)
        
        self.fc_txt_1 = nn.Linear(TXT_LEN, d * 2)
        self.fc_txt_2 = nn.Linear(d * 2, d * 2)
        
        self.dropout = nn.modules.Dropout(p=drop)
        
    def forward(self, inp_img, inp_txt):
        x_img = self.dropout(F.relu(self.fc_img_1(inp_img)))
        x_img = self.dropout(F.relu(self.fc_img_2(inp_img)))
        
        x_txt = self.dropout(F.relu(self.fc_txt_1(inp_txt)))
        x_txt = self.dropout(F.relu(self.fc_txt_2(inp_txt)))
        
        x = F.relu(torch.cat((x_img, x_txt), 1))
        return x
    
class DecoderComplex(nn.Module):
    def __init__(self, d, drop=0.5):
        super().__init__()
        self.fc_img_1 = nn.Linear(2 * d, 4 * d)
        self.fc_img_2 = nn.Linear(4 * d, IMG_LEN)
        
        self.fc_txt_1 = nn.Linear(2 * d, 2 * d)
        self.fc_txt_2 = nn.Linear(2 * d, TXT_LEN)
        
        self.dropout = nn.modules.Dropout(p=drop)
    
    def forward(self, x):
        x_img = self.dropout(F.relu(self.fc_img_1(x)))
        x_img = self.fc_img_2(x)
        
        x_txt = self.dropout(F.relu(self.fc_txt_1(x)))
        x_txt = self.fc_txt_2(x)
        
        return x_img, x_txt

class AutoencoderComplex(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.encoder = Encoder(d)
        self.decoder = Decoder(d)
        
    def forward(self, inp_img, inp_txt):
        x = self.encoder(inp_img, inp_txt)
        x_img, x_txt = self.decoder(x)
        return x_img, x_txt

In [25]:
autoencoder_c = AutoencoderComplex(d=128)
optimizer_c = torch.optim.Adam(autoencoder_c.parameters(), lr=1e-3)

stat = fit_autoencoder(autoencoder_c, optimizer_c, 100, [x_img_train, x_txt_train], [x_img_val, x_txt_val])
pickle.dump(stat, open( "autoencoder_c128_stat/trivial_" + str(d) + ".pickle", "wb"))

train img loss: 0.695 txt_loss: 0.717 img + txt loss 1.411
val img loss: 0.125 val txt_loss: 0.136 img + txt loss 0.261
train img loss: 0.407 txt_loss: 0.463 img + txt loss 0.870
val img loss: 0.086 val txt_loss: 0.103 img + txt loss 0.189
train img loss: 0.302 txt_loss: 0.376 img + txt loss 0.678
val img loss: 0.069 val txt_loss: 0.089 img + txt loss 0.159
train img loss: 0.254 txt_loss: 0.338 img + txt loss 0.592
val img loss: 0.061 val txt_loss: 0.083 img + txt loss 0.143
train img loss: 0.229 txt_loss: 0.317 img + txt loss 0.546
val img loss: 0.056 val txt_loss: 0.078 img + txt loss 0.134
train img loss: 0.213 txt_loss: 0.302 img + txt loss 0.516
val img loss: 0.053 val txt_loss: 0.075 img + txt loss 0.128
train img loss: 0.203 txt_loss: 0.290 img + txt loss 0.493
val img loss: 0.051 val txt_loss: 0.072 img + txt loss 0.123
train img loss: 0.195 txt_loss: 0.279 img + txt loss 0.475
val img loss: 0.049 val txt_loss: 0.070 img + txt loss 0.119
train img loss: 0.189 txt_loss: 0.269 im

val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.042 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.042 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 img + txt loss 0.099
train img loss: 0.163 txt_loss: 0.225 img + txt loss 0.388
val img loss: 0.041 val txt_loss: 0.057 

NameError: name 'd' is not defined

In [33]:
encoder_c_2 = autoencoder_c.encoder
after_encoder_model_c_2 = AfterEncoderModel(encoder_c_2, d=128, drop=0.5)
optimizer_aem_c_2 = torch.optim.Adam(after_encoder_model_c_2.parameters(), lr=1e-3)
writer = SummaryWriter('runs/aem_c_2_bs2048_rs42_d128_wd0005_drop05_100')

torch_models.fit_topics_model(
    model=after_encoder_model_c_2,
    optimizer=optimizer_aem_c_2,
    epochs=100,
    writer=writer,
    train_loader=train_loader_2000,
    val_loader=val_loader
)

epoch: 0 train_loss: tensor(3.8203, grad_fn=<NllLossBackward>) average train loss tensor(3.8808, grad_fn=<DivBackward0>)
val_acc: 0.07929359823399558 val_avg_loss: tensor(3.7910)
epoch: 1 train_loss: tensor(3.6845, grad_fn=<NllLossBackward>) average train loss tensor(3.7341, grad_fn=<DivBackward0>)
val_acc: 0.0866813833701251 val_avg_loss: tensor(3.6537)
epoch: 2 train_loss: tensor(3.5847, grad_fn=<NllLossBackward>) average train loss tensor(3.6208, grad_fn=<DivBackward0>)
val_acc: 0.11935246504782929 val_avg_loss: tensor(3.5340)
epoch: 3 train_loss: tensor(3.4435, grad_fn=<NllLossBackward>) average train loss tensor(3.5119, grad_fn=<DivBackward0>)
val_acc: 0.18949227373068434 val_avg_loss: tensor(3.4188)
epoch: 4 train_loss: tensor(3.3281, grad_fn=<NllLossBackward>) average train loss tensor(3.3978, grad_fn=<DivBackward0>)
val_acc: 0.22740250183958793 val_avg_loss: tensor(3.2813)
epoch: 5 train_loss: tensor(3.2015, grad_fn=<NllLossBackward>) average train loss tensor(3.2824, grad_fn=<

KeyboardInterrupt: 