In [1]:
import json

import optuna

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import SubsetRandomSampler, DataLoader
from torchvision import transforms as T

import numpy as np
%cd ../
from src.data.datasets import HandwritingDataset
from src.models import HandwritingClassifier
%cd notebooks/

%matplotlib inline
%load_ext autoreload
%autoreload 2

/home/nazar/Projects/ukrainian_handwriting
/home/nazar/Projects/ukrainian_handwriting/notebooks


In [2]:
device = torch.device("cuda:0")

In [3]:
MEAN = HandwritingClassifier._mean
STD = HandwritingClassifier._std

In [4]:
tf = T.Compose([
    T.RandomRotation(30),
    T.RandomAffine(0, (0.1, 0.1)),
    T.ToTensor(),
    T.Normalize(mean=MEAN, std=STD)
])

In [5]:
train_data = HandwritingDataset(
    '../data/processed/train_data.csv',
    transforms=tf
)

test_data = HandwritingDataset(
    '../data/processed/test_data.csv',
    transforms=T.Compose([
        T.ToTensor(),
        T.Normalize(mean=MEAN, std=STD)
    ])
)

print('Number of samples in training data:', len(train_data))
print('Number of samples in test data:', len(test_data))

Number of samples in training data: 1281
Number of samples in test data: 300


In [6]:
BATCH_SIZE = 64
VAL_SIZE = 100

indices = list(range(len(train_data)))
np.random.seed(42)
np.random.shuffle(indices)
train_indices, val_indices = indices[VAL_SIZE:], indices[:VAL_SIZE]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

train_loader = DataLoader(train_data, BATCH_SIZE, sampler=train_sampler)
val_loader = DataLoader(train_data, sampler=val_sampler)
test_loader = DataLoader(test_data)

In [12]:
def build_model(params: dict):
    model = HandwritingClassifier()
    model.load_state_dict(torch.load('../models/mnist_model.pt'))
    
    num_features = model.class_fc.in_features
    model.class_fc = nn.Linear(num_features, 43)
    
    model.type(torch.cuda.FloatTensor)
    model.to(device)
    
    criterion = nn.CrossEntropyLoss().type(torch.cuda.FloatTensor)
    
    LR = params['learning_rate']
    REG = params['weight_decay']
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=REG)
    
    factor = params['factor']
    patience = params['patience']
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=factor,
                                                     patience=patience)
    return model, criterion, optimizer, scheduler

def train_model(params: dict):
    model, criterion, optimizer, scheduler = build_model(params)
    
    for epoch in range(30):
        model.train()
        
        for i, (x, y, _) in enumerate(train_loader):
            x_gpu = x.to(device)
            y_gpu = y.to(device)
            
            loss_value = criterion(model(x_gpu), y_gpu)
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
        
        loss_acum = 0
        correct_samples = 0
        total_samples = 0
        
        model.eval()
        for i, (x, y, _) in enumerate(val_loader):
            x_gpu = x.to(device)
            y_gpu = y.to(device)
            
            logits = model(x_gpu)
            loss_acum += criterion(logits, y_gpu).item()
            prediction = torch.argmax(logits, 1)
            
            correct_samples += torch.sum(prediction == y_gpu).item()
            total_samples += y.shape[0]
        if scheduler:
            scheduler.step(loss_acum / i)
        accuracy = correct_samples / total_samples
    return accuracy


def objective(trial):
    params = {
        'num_epochs': trial.suggest_int('num_epochs', 30, 30),
        'optimizer': trial.suggest_categorical('optimizer', ('SGD',)),
        'learning_rate': trial.suggest_float('learning_rate', 7e-3, 7e-2, log=True),
        'weight_decay': trial.suggest_float('weight_decay', 1e-3, 7e-3, log=True),
        'scheduler': trial.suggest_categorical('scheduler', ('ReduceLROnPlateau',)),
        'factor': trial.suggest_float('factor', 0.05, 0.2),
        'patience': trial.suggest_int('patience', 2, 4),
    }
    
    accuracy = train_model(params)
    
    return accuracy

In [13]:
name = 'Test run for mnist + glyphs'
study = optuna.create_study(study_name=name, direction='maximize')
study.optimize(objective, n_trials=50)

[32m[I 2022-07-18 19:10:21,757][0m A new study created in memory with name: Test run for mnist + glyphs[0m
[32m[I 2022-07-18 19:11:11,388][0m Trial 0 finished with value: 0.76 and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.02065462907105721, 'weight_decay': 0.0020828018319538504, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.18435994273331363, 'patience': 2}. Best is trial 0 with value: 0.76.[0m
[32m[I 2022-07-18 19:12:01,763][0m Trial 1 finished with value: 0.76 and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.018676047229254967, 'weight_decay': 0.003245110318934138, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.18442744604372996, 'patience': 3}. Best is trial 0 with value: 0.76.[0m
[32m[I 2022-07-18 19:12:55,078][0m Trial 2 finished with value: 0.79 and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.04564540644369428, 'weight_decay': 0.002689645232801303, 'scheduler': 'ReduceLROnPlateau', 'factor': 

[32m[I 2022-07-18 19:32:09,445][0m Trial 26 finished with value: 0.67 and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.024954639877389183, 'weight_decay': 0.0030101471337972505, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.16544544868244854, 'patience': 4}. Best is trial 13 with value: 0.85.[0m
[32m[I 2022-07-18 19:32:54,893][0m Trial 27 finished with value: 0.81 and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.041914495323878115, 'weight_decay': 0.0016577512760907217, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.1107172553115156, 'patience': 3}. Best is trial 13 with value: 0.85.[0m
[32m[I 2022-07-18 19:33:40,610][0m Trial 28 finished with value: 0.77 and parameters: {'num_epochs': 30, 'optimizer': 'SGD', 'learning_rate': 0.03901074399753764, 'weight_decay': 0.0023984748389947108, 'scheduler': 'ReduceLROnPlateau', 'factor': 0.1287751753633695, 'patience': 4}. Best is trial 13 with value: 0.85.[0m
[32m[I 2022-07-18 19:34:25,

In [14]:
best_params = study.best_params

In [18]:
with open('../models/best_params.json', 'w') as f:
    json.dump(best_params, f)