In [1]:
import json
from itertools import chain

import optuna

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import SubsetRandomSampler, DataLoader
from torchvision import transforms as T

import numpy as np

%cd ../
from src.data.datasets import HandwritingDataset
from src.models.models import HandwritingClassifier
%cd notebooks/

%matplotlib inline
%load_ext autoreload
%autoreload 2

/home/nazar/Projects/ukrainian_handwriting
/home/nazar/Projects/ukrainian_handwriting/notebooks


In [2]:
device = torch.device('cuda:0')

In [3]:
MEAN = HandwritingClassifier._mean
STD = HandwritingClassifier._std

In [4]:
tf = T.Compose([
    T.RandomRotation(30),
    T.RandomAffine(0, (0.1, 0.1)),
    T.ToTensor(),
    T.Normalize(mean=MEAN, std=STD)
])

In [5]:
train_data = HandwritingDataset(
    '../data/processed/train_data.csv',
    transforms=tf
)

test_data = HandwritingDataset(
    '../data/processed/test_data.csv',
    transforms=T.Compose([
        T.ToTensor(),
        T.Normalize(mean=MEAN, std=STD)
    ])
)

print('Number of samples in training data:', len(train_data))
print('Number of samples in test data:', len(test_data))

Number of samples in training data: 1281
Number of samples in test data: 300


In [6]:
BATCH_SIZE = 64
VAL_SIZE = 100

indices = list(range(len(train_data)))
np.random.seed(42)
np.random.shuffle(indices)
train_indices, val_indices = indices[VAL_SIZE:], indices[:VAL_SIZE]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

train_loader = DataLoader(train_data, BATCH_SIZE, sampler=train_sampler)
val_loader = DataLoader(train_data, sampler=val_sampler)
test_loader = DataLoader(test_data)

In [7]:
def build_model(params: dict):
#     model = HandwritingClassifier()
#     model.load_state_dict(torch.load('../models/mnist_model.pt'), strict=False)
    
#     # freeze pretrained model
#     for fname, param in model.named_parameters():
#         name = fname.split('.')[0]
#         if name == 'token_classifier' or name == 'is_upp_classifier':
#             continue
#         param.requires_grad = False


    model = torch.load('../models/model_heads.pth')
    
    for param in model.parameters():
        param.requires_grad = True

    model.to(device)
    
    criterion_1 = nn.CrossEntropyLoss().type(torch.cuda.FloatTensor)
    criterion_2 = nn.BCEWithLogitsLoss().type(torch.cuda.FloatTensor)
    losses = (criterion_1, criterion_2)
    
    LR = params['learning_rate']
    REG = params['weight_decay']
    # optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=REG)
#     heads = chain(
#             model.token_classifier.parameters(),
#             model.is_upp_classifier.parameters()
#         )
    optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=REG)
    
    
    factor = params['factor']
    patience = params['patience']
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=factor, patience=patience)
    return model, losses, optimizer, scheduler


def compute_accuracy(prediction, ground_truth):
    correct = torch.sum(prediction == ground_truth).item()
    return correct / len(ground_truth)


def validate(model, losses, loader):
    model.eval()
    lbl_acc = 0
    is_upp_acc = 0
    loss_acum = 0
    for i, (x, *y) in enumerate(loader):
        x_gpu = x.to(device)
        y[1] = y[1].unsqueeze(1).float()
        y_gpu = tuple(targ.to(device) for targ in y)
        
        prediction = model(x_gpu)
        loss_value = sum(
            loss(out, targ) for loss, out, targ in zip(losses, prediction, y_gpu)
        )
        
        loss_acum += loss_value.item()
        lbl = torch.argmax(prediction[0], 1)
        lbl_acc += compute_accuracy(lbl, y_gpu[0])
        is_upp = 0 if prediction[1].item() < 0.5 else 1
        is_upp_acc += compute_accuracy(is_upp, y_gpu[1])
    return loss_acum / i, lbl_acc / i, is_upp_acc / i

def train_model(params: dict):
    model, losses, optimizer, scheduler = build_model(params)
    
    num_epochs = params['num_epochs']
    for epoch in range(num_epochs):
        model.train()
        
        for i, (x, *y) in enumerate(train_loader):
            x_gpu = x.to(device)
            y[1] = y[1].unsqueeze(1).float()
            y_gpu = tuple(target.to(device) for target in y)
            
            prediction = model(x_gpu)
            loss_value = sum(
                loss(out, targ) for loss, out, targ in zip(losses, prediction, y_gpu)
            )
            
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
        
        val_loss, lbl_acc, is_upp_acc = validate(model, losses, val_loader)
        if scheduler:
            scheduler.step(val_loss)
    return val_loss


def objective(trial):
    params = {
        'num_epochs': trial.suggest_int('num_epochs', 20, 20),
        'batch_size': trial.suggest_int('batch_size', 64, 64),
        'optimizer': trial.suggest_categorical('optimizer', ('Adam',)),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 5e-2, log=True),
        'weight_decay': trial.suggest_float('weight_decay', 1e-3, 1e-2, log=True),
        'scheduler': trial.suggest_categorical('scheduler', ('ReduceLROnPlateau',)),
        'factor': trial.suggest_float('factor', 0.1, 0.4),
        'patience': trial.suggest_int('patience', 1, 3),
    }
    
    val_loss = train_model(params)
    
    return val_loss

In [8]:
name = 'Test run for mnist + glyphs and case determination'
study = optuna.create_study(study_name=name)
study.optimize(objective, n_trials=50)

[32m[I 2022-09-04 20:48:08,514][0m A new study created in memory with name: Test run for mnist + glyphs and case determination[0m
[32m[I 2022-09-04 20:48:46,385][0m Trial 0 finished with value: 0.6471148654571593 and parameters: {'num_epochs': 20, 'batch_size': 64, 'optimizer': 'Adam', 'learning_rate': 0.016374941470623364, 'weight_decay': 0.0021391377679174256, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.25915355413417274, 'patience': 1}. Best is trial 0 with value: 0.6471148654571593.[0m
[32m[I 2022-09-04 20:49:23,367][0m Trial 1 finished with value: 0.7570399295802068 and parameters: {'num_epochs': 20, 'batch_size': 64, 'optimizer': 'Adam', 'learning_rate': 0.015107217937173891, 'weight_decay': 0.002335750989092541, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.214428481842945, 'patience': 3}. Best is trial 0 with value: 0.6471148654571593.[0m
[32m[I 2022-09-04 20:50:04,310][0m Trial 2 finished with value: 0.7158659761207122 and parameters: {'num_epochs': 20, 'batch_siz

[32m[I 2022-09-04 21:02:42,679][0m Trial 22 finished with value: 0.8073664940534997 and parameters: {'num_epochs': 20, 'batch_size': 64, 'optimizer': 'Adam', 'learning_rate': 0.009839860488035337, 'weight_decay': 0.001013802807943777, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.3924330960863711, 'patience': 2}. Best is trial 14 with value: 0.453166622933085.[0m
[32m[I 2022-09-04 21:03:19,402][0m Trial 23 finished with value: 0.8966747745965616 and parameters: {'num_epochs': 20, 'batch_size': 64, 'optimizer': 'Adam', 'learning_rate': 0.02444581786501235, 'weight_decay': 0.0014929762711947765, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.34496775477011066, 'patience': 2}. Best is trial 14 with value: 0.453166622933085.[0m
[32m[I 2022-09-04 21:03:56,231][0m Trial 24 finished with value: 0.48420781971218835 and parameters: {'num_epochs': 20, 'batch_size': 64, 'optimizer': 'Adam', 'learning_rate': 0.0035701854265127505, 'weight_decay': 0.0019369326744404184, 'scheduler': 'ReduceL

[32m[I 2022-09-04 21:16:38,434][0m Trial 45 finished with value: 0.5545093628173374 and parameters: {'num_epochs': 20, 'batch_size': 64, 'optimizer': 'Adam', 'learning_rate': 0.001086133044674393, 'weight_decay': 0.001711591925069332, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.2805706286082292, 'patience': 2}. Best is trial 41 with value: 0.4180572386642899.[0m
[32m[I 2022-09-04 21:17:16,233][0m Trial 46 finished with value: 0.5975347953340546 and parameters: {'num_epochs': 20, 'batch_size': 64, 'optimizer': 'Adam', 'learning_rate': 0.0038915943569137255, 'weight_decay': 0.003848222894273991, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.35339981110607294, 'patience': 3}. Best is trial 41 with value: 0.4180572386642899.[0m
[32m[I 2022-09-04 21:17:52,813][0m Trial 47 finished with value: 0.4731298969515289 and parameters: {'num_epochs': 20, 'batch_size': 64, 'optimizer': 'Adam', 'learning_rate': 0.002453734949863882, 'weight_decay': 0.0020321339555709897, 'scheduler': 'Reduce

In [9]:
best_params = study.best_params

In [10]:
best_params

{'num_epochs': 20,
 'batch_size': 64,
 'optimizer': 'Adam',
 'learning_rate': 0.0023955033226241208,
 'weight_decay': 0.001516250075936066,
 'scheduler': 'ReduceLROnPlateau',
 'factor': 0.3278769074801458,
 'patience': 3}

In [12]:
with open('../models/new_best_params.json', 'w') as f:
    json.dump(best_params, f, indent=4)