In [73]:
from os import getcwd
from os import path
from copy import deepcopy

import joblib
import numpy as np
import optuna
from optuna.trial import TrialState
import pickle
import plotly.express as px
import plotly.offline as pyo
import torch
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

from src.data.load_dataset import load_mnist, load_kmnist
from src.models.networks import V1_mnist_RFNet, classical_RFNet
from src.models.utils import train, test 

In [3]:
data_dir = path.abspath(path.join(getcwd(), '../../'))
pyo.init_notebook_mode(connected=True)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
batch_size = 256
epochs = 10
log_interval = 10

In [6]:
V1_RFNet = V1_mnist_RFNet(100, 5.0, 2.0, center=None).to(device)

In [None]:
def V1_RFNet_objective(trial):
    
    # load the data
    train_loader, val_loader, _ = load_mnist(train_batch_size=batch_size, train_percentage=0.2)
    
    # load the model
    s = trial.suggest_uniform("size", 0.01,  10)
    f = trial.suggest_uniform("frequency", 0.01, 10)
    model = V1_mnist_RFNet(100, s, f, center=None).to(device)
    
    # generate optimizers, learning rate, and the loss function
    lr = 0.0031485838088746586
    optimizer = optim.Adam(model.parameters(), lr=lr) 
    loss_fn = F.cross_entropy

#     lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True) 
#     optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
#     optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
#     loss_fn = F.cross_entropy_loss

    
    # train and validate
    for epoch in range(epochs + 1):
        _ = train(log_interval, device, model, train_loader, optimizer, epoch, loss_fn, verbose=False)
        val_accuracy = test(model, device, val_loader, loss_fn, verbose=False)
        
        trial.report(val_accuracy, epoch)
    
        prune if unpromising trial
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return val_accuracy

In [None]:
# create a hyperparameter study
v1_study = optuna.create_study(sampler=optuna.samplers.RandomSampler(), direction='maximize')
v1_study.optimize(V1_RFNet_objective, n_trials=50)

# save the parameter study
joblib.dump(v1_study, data_dir + '/models/results/mnist_clf/mnist_param_study.pkl')

pruned_trials = v1_study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = v1_study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(v1_study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = v1_study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
# plot the parameter study
with open(data_dir + '/models/results/mnist_clf/mnist_param_study.pkl', 'rb') as file:
    v1_study = joblib.load(file)

fig = optuna.visualization.plot_contour(v1_study)
fig.show()

In [33]:
network_500 = V1_mnist_RFNet(500, 5.34, 1.965, None).to(device)
print('Done')
network_600 = V1_mnist_RFNet(600, 5.34, 1.965, None).to(device)
print('Done')
network_1000 = V1_mnist_RFNet(1000, 5.34, 1.965, None).to(device)
print('Done')

Done
Done
Done


In [348]:
# train the model using the most optimal hyperparams
hidden_size = 1000
s, f, c = 5.34, 1.965, None
lr = 0.0031485838088746586
num_epochs = 10
log_interval = 100

# define the model, optimize, loss
model = deepcopy(network_1000)
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
loss_fn = F.cross_entropy

# load data
train_batch_size, train_percentage = 64, 0.01
train_loader, val_loader, test_loader = load_kmnist(train_batch_size, train_percentage)

for epoch in range(num_epochs):
    _ = train(log_interval, device, model, train_loader, optimizer, epoch, loss_fn, verbose=True)
    if (epoch + 1) % 10 == 0:
        test_accuracy = test(model, device, test_loader, loss_fn, verbose=True)
        
    scheduler.step()


Test set: Average loss: 27.567228. Accuracy: 6570/10000 (65.70%)



In [313]:
100 - test_accuracy

15.030000000000001

### classical network

In [None]:
inp_size = (1, 28, 28)
hidden_size = 500
def classical_RFNet_objective(trial):
    
    # load the data
    train_loader, val_loader, _ = load_mnist(train_batch_size=batch_size, train_percentage=0.2)
    
    # load the model
    model = classical_RFNet(inp_size, hidden_size).to(device)
    
    # generate optimizers and the learning rate
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True) 
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    
    # train and validate
    for epoch in range(epochs + 1):
        _ = train(log_interval, device, model, train_loader, optimizer, epoch, verbose=False)
        val_accuracy = test(model, device, val_loader, verbose=False)
        
        trial.report(val_accuracy, epoch)
    
        # prune if unpromising trial
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return val_accuracy

In [None]:
# create a hyperparameter study
classical_study = optuna.create_study(sampler=optuna.samplers.TPESampler(), direction='maximize')
classical_study.optimize(classical_RFNet_objective, n_trials=50)

pruned_trials = classical_study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = classical_study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(classical_study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = classical_study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [350]:
# train the model using the most optimal hyperparams
inp_size = (1, 28, 28)
hidden_size = 1000
lr = 0.01922083004518646
num_epochs = 10
log_interval = 100

# define the model
model = classical_RFNet(inp_size, hidden_size).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
loss_fn = F.cross_entropy

# load data
train_batch_size, train_percentage = 64, 0.01
train_loader, val_loader, test_loader = load_kmnist(train_batch_size, train_percentage)

for epoch in range(num_epochs):
    _ = train(log_interval, device, model, train_loader, optimizer, epoch, loss_fn, verbose=True)
    if (epoch + 1) % 10 == 0:
        test_accuracy = test(model, device, test_loader, loss_fn, verbose=True)
    scheduler.step()


Test set: Average loss: 1.818231. Accuracy: 6127/10000 (61.27%)



In [298]:
100 - test_accuracy

11.86

In [343]:
scheduler.get_last_lr()

[0.00019220830045186461]