# Hyperparameter Tuning

## Setup

In [1]:
%cd ..

/Users/Niels/Documents/EPFL/Master/DeepLearning/project1


### Imports

In [2]:
import time

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F

import models as models
from train import train
from evaluation import model_tuning

## Baseline

In [3]:
def gen_baseline(params):
    # No hidden layer units, ignore hidden_units parameter
    
    model = models.BaselineCNN()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, optimizer

In [4]:
epochs = 25
rounds = 10
seed = 0

learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = [5, 10, 25]
# Doesn't matter: no MLP after LeNet 
hidden_layer_units = [10]

batch_size, lr, _ = model_tuning(
    gen_baseline, train, epochs, rounds, batch_sizes, learning_rates, hidden_layer_units, seed=seed)

batch_size, lr

Testing batch_size 5, lr=0.0001, units=10
    Round 0: 1.1092
    Round 1: 0.7969
    Round 2: 0.7165
    Round 3: 0.7688
    Round 4: 0.7141
    Round 5: 0.6834
    Round 6: 1.2128
    Round 7: 0.7013
    Round 8: 0.7184
    Round 9: 0.9250
  Average: 0.8346
Testing batch_size 5, lr=0.001, units=10
    Round 0: 1.8809
    Round 1: 1.2092
    Round 2: 2.2841
    Round 3: 1.2975
    Round 4: 1.4690
    Round 5: 0.9899
    Round 6: 3.1117
    Round 7: 1.3108
    Round 8: 1.3600
    Round 9: 1.5018
  Average: 1.6415
Testing batch_size 5, lr=0.01, units=10
    Round 0: 0.6871
    Round 1: 0.6862
    Round 2: 0.6938
    Round 3: 0.6946
    Round 4: 0.7018
    Round 5: 0.6874
    Round 6: 0.6833
    Round 7: 0.6924
    Round 8: 0.6842
    Round 9: 0.6942
  Average: 0.6905
Testing batch_size 10, lr=0.0001, units=10
    Round 0: 0.6710
    Round 1: 0.4592
    Round 2: 0.6989
    Round 3: 0.5265
    Round 4: 0.7384
    Round 5: 0.8710
    Round 6: 0.6416
    Round 7: 0.6832
    Round 8: 0.7944


(25, 0.0001)

## Baseline 2

In [5]:
def gen_baseline_2(params):
    # No hidden layer units, ignore parameter
    
    model = models.BaselineCNN2(hidden_layer_units=params['hidden_units'])
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, optimizer

In [None]:
epochs = 25
rounds = 10
seed = 0

learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = [10, 25]
hidden_layer_units = [10, 25, 50, 100]

batch_size, lr, hidden_units = model_tuning(
    gen_baseline_2, train, epochs, rounds, batch_sizes, learning_rates, hidden_layer_units, seed=seed)

batch_size, lr, hidden_units

Testing batch_size 10, lr=0.0001, units=10
    Round 0: 0.7136
    Round 1: 0.8627
    Round 2: 0.7815
    Round 3: 0.8811
    Round 4: 0.7991
    Round 5: 0.9208
    Round 6: 1.0042
    Round 7: 0.7632
    Round 8: 0.9754
    Round 9: 0.6918
  Average: 0.8393
Testing batch_size 10, lr=0.0001, units=25
    Round 0: 0.9505
    Round 1: 1.2050
    Round 2: 1.0164
    Round 3: 0.6881
    Round 4: 1.2187
    Round 5: 1.1406
    Round 6: 0.9415
    Round 7: 0.7762
    Round 8: 0.7240
    Round 9: 1.0407
  Average: 0.9702
Testing batch_size 10, lr=0.0001, units=50
    Round 0: 1.3807
    Round 1: 0.9928
    Round 2: 0.9718
    Round 3: 0.8047
    Round 4: 0.9663
    Round 5: 0.6732
    Round 6: 0.6444
    Round 7: 0.9131
    Round 8: 1.0252
    Round 9: 0.8202
  Average: 0.9192
Testing batch_size 10, lr=0.0001, units=100
    Round 0: 0.8572
    Round 1: 1.1305
    Round 2: 1.2486
    Round 3: 1.0130
    Round 4: 0.9079
    Round 5: 0.9733
    Round 6: 0.9500
    Round 7: 0.8811
    Round 8: 

## Weight Sharing

In [None]:
def gen_weight_sharing(params):
    # No hidden layer units, ignore parameter
    
    model = models.WeightSharingCNN(hidden_layer_units=params['hidden_units'])
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, optimizer

In [None]:
epochs = 25
rounds = 10
seed = 0

learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = 10, 25
# Doesn't matter: no MLP after LeNet 
hidden_layer_units = [10, 25, 50, 100]

batch_size, lr, hidden_units = model_tuning(
    gen_weight_sharing, train, epochs, rounds, batch_sizes, learning_rates, hidden_layer_units, seed=seed)

batch_size, lr, hidden_units

## Weight Sharing and Auxiliary Loss