In [1]:
import json

import optuna

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import SubsetRandomSampler, DataLoader
from torchvision import transforms as T

import numpy as np

%cd ../
from src.data.datasets import HandwritingDataset
from src.models.models import HandwritingClassifier
%cd notebooks/

%matplotlib inline
%load_ext autoreload
%autoreload 2

/home/nazar/Projects/ukrainian_handwriting
/home/nazar/Projects/ukrainian_handwriting/notebooks


In [3]:
device = torch.device('cuda:0')

In [4]:
MEAN = HandwritingClassifier._mean
STD = HandwritingClassifier._std

In [5]:
tf = T.Compose([
    T.RandomRotation(30),
    T.RandomAffine(0, (0.1, 0.1)),
    T.ToTensor(),
    T.Normalize(mean=MEAN, std=STD)
])

In [6]:
train_data = HandwritingDataset(
    '../data/processed/train_data.csv',
    transforms=tf
)

test_data = HandwritingDataset(
    '../data/processed/test_data.csv',
    transforms=T.Compose([
        T.ToTensor(),
        T.Normalize(mean=MEAN, std=STD)
    ])
)

print('Number of samples in training data:', len(train_data))
print('Number of samples in test data:', len(test_data))

Number of samples in training data: 1281
Number of samples in test data: 300


In [7]:
BATCH_SIZE = 64
VAL_SIZE = 100

indices = list(range(len(train_data)))
np.random.seed(42)
np.random.shuffle(indices)
train_indices, val_indices = indices[VAL_SIZE:], indices[:VAL_SIZE]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

train_loader = DataLoader(train_data, BATCH_SIZE, sampler=train_sampler)
val_loader = DataLoader(train_data, sampler=val_sampler)
test_loader = DataLoader(test_data)

In [13]:
def build_model(params: dict):
    model = HandwritingClassifier()
    model.load_state_dict(torch.load('../models/mnist_model.pt'), strict=False)
    
    model.type(torch.cuda.FloatTensor)
    model.to(device)
    
    criterion_1 = nn.CrossEntropyLoss().type(torch.cuda.FloatTensor)
    criterion_2 = nn.BCEWithLogitsLoss().type(torch.cuda.FloatTensor)
    losses = (criterion_1, criterion_2)
    
    LR = params['learning_rate']
    REG = params['weight_decay']
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=REG)
    
    factor = params['factor']
    patience = params['patience']
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=factor,
                                                     patience=patience)
    return model, losses, optimizer, scheduler


def compute_accuracy(prediction, ground_truth):
    correct = torch.sum(prediction == ground_truth).item()
    return correct / len(ground_truth)


def validate(model, losses, loader):
    model.eval()
    lbl_acc = 0
    is_upp_acc = 0
    loss_acum = 0
    for i, (x, *y, _) in enumerate(loader):
        x_gpu = x.to(device)
        y[1] = y[1].unsqueeze(1).float()
        y_gpu = tuple(targ.to(device) for targ in y)
        
        prediction = model(x_gpu)
        loss_value = sum(
            loss(out, targ) for loss, out, targ in zip(losses, prediction, y_gpu)
        )
        
        loss_acum += loss_value.item()
        lbl = torch.argmax(prediction[0], 1)
        lbl_acc += compute_accuracy(lbl, y_gpu[0])
        is_upp = 0 if prediction[1].item() < 0.5 else 1
        is_upp_acc += compute_accuracy(is_upp, y_gpu[1])
    return loss_acum / i, lbl_acc / i, is_upp_acc / i

def train_model(params: dict):
    model, losses, optimizer, scheduler = build_model(params)
    
    num_epochs = params['num_epochs']
    for epoch in range(num_epochs):
        model.train()
        
        for i, (x, *y, _) in enumerate(train_loader):
            x_gpu = x.to(device)
            y[1] = y[1].unsqueeze(1).float()
            y_gpu = tuple(target.to(device) for target in y)
            
            prediction = model(x_gpu)
            loss_value = sum(
                loss(out, targ) for loss, out, targ in zip(losses, prediction, y_gpu)
            )
            
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
        
        val_loss, lbl_acc, is_upp_acc = validate(model, losses, val_loader)
        if scheduler:
            scheduler.step(val_loss)
    return lbl_acc, is_upp_acc


def objective(trial):
    params = {
        'num_epochs': trial.suggest_int('num_epochs', 30, 30),
        'optimizer': trial.suggest_categorical('optimizer', ('SGD',)),
        'learning_rate': trial.suggest_float('learning_rate', 7e-3, 7e-2, log=True),
        'weight_decay': trial.suggest_float('weight_decay', 1e-3, 7e-3, log=True),
        'scheduler': trial.suggest_categorical('scheduler', ('ReduceLROnPlateau',)),
        'factor': trial.suggest_float('factor', 0.05, 0.2),
        'patience': trial.suggest_int('patience', 2, 4),
    }
    
    lbl_acc, is_upp_acc = train_model(params)
    
    return lbl_acc, is_upp_acc

In [14]:
name = 'Test run for mnist + glyphs and case determination'
study = optuna.create_study(study_name=name, directions=['maximize', 'maximize'])
study.optimize(objective, n_trials=50)

[32m[I 2022-07-24 16:25:40,036][0m A new study created in memory with name: Test run for mnist + glyphs and case determination[0m
[32m[I 2022-07-24 16:26:30,282][0m Trial 0 finished with values: [0.8181818181818182, 0.9696969696969697] and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.021566075664290255, 'weight_decay': 0.0014545889488107417, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.1852627813476765, 'patience': 2}. [0m
[32m[I 2022-07-24 16:27:19,003][0m Trial 1 finished with values: [0.797979797979798, 0.9393939393939394] and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.02370433346337466, 'weight_decay': 0.0038705402738629493, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.0867850350584129, 'patience': 2}. [0m
[32m[I 2022-07-24 16:28:15,869][0m Trial 2 finished with values: [0.7676767676767676, 0.9494949494949495] and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.059584195093264083, 'weight_decay':

[32m[I 2022-07-24 16:47:38,181][0m Trial 25 finished with values: [0.7575757575757576, 0.9393939393939394] and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.013390240964420944, 'weight_decay': 0.006106049854157119, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.12825262997485454, 'patience': 4}. [0m
[32m[I 2022-07-24 16:48:25,358][0m Trial 26 finished with values: [0.7575757575757576, 0.9090909090909091] and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.01769472992744681, 'weight_decay': 0.002246485698365526, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.0766994355634878, 'patience': 3}. [0m
[32m[I 2022-07-24 16:49:12,797][0m Trial 27 finished with values: [0.7272727272727273, 0.9090909090909091] and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.009973956071770818, 'weight_decay': 0.004141540449724739, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.19972520361979107, 'patience': 3}. [0m
[32m[I 2022-07-24 16

In [19]:
best_params = study.best_trials[0].params

In [22]:
with open('../models/best_params.json', 'w') as f:
    json.dump(best_params, f, indent=4)