# Hyperparameter Tuning

## Setup

In [1]:
%cd ..

/Users/Niels/Documents/EPFL/Master/DeepLearning/project1


### Imports

In [2]:
import time

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F

import models as models
from train import train
from evaluation import model_tuning

## Baseline

In [3]:
def gen_baseline(params):
    # No hidden layer units, ignore hidden_units parameter
    
    model = models.BaselineCNN()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, optimizer

In [4]:
epochs = 25
rounds = 10
seed = 0

learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = [5, 10, 25]
# Doesn't matter: no MLP after LeNet 
hidden_layer_units = [10]

batch_size, lr, _ = model_tuning(
    gen_baseline, train, epochs, rounds, batch_sizes, learning_rates, hidden_layer_units, seed=seed)

batch_size, lr

Testing batch_size 5, lr=0.0001, units=10
    Round 0: 1.1092
    Round 1: 0.7969
    Round 2: 0.7165
    Round 3: 0.7688
    Round 4: 0.7141
    Round 5: 0.6834
    Round 6: 1.2128
    Round 7: 0.7013
    Round 8: 0.7184
    Round 9: 0.9250
  Average: 0.8346
Testing batch_size 5, lr=0.001, units=10
    Round 0: 1.8809
    Round 1: 1.2092
    Round 2: 2.2841
    Round 3: 1.2975
    Round 4: 1.4690
    Round 5: 0.9899
    Round 6: 3.1117
    Round 7: 1.3108
    Round 8: 1.3600
    Round 9: 1.5018
  Average: 1.6415
Testing batch_size 5, lr=0.01, units=10
    Round 0: 0.6871
    Round 1: 0.6862
    Round 2: 0.6938
    Round 3: 0.6946
    Round 4: 0.7018
    Round 5: 0.6874
    Round 6: 0.6833
    Round 7: 0.6924
    Round 8: 0.6842
    Round 9: 0.6942
  Average: 0.6905
Testing batch_size 10, lr=0.0001, units=10
    Round 0: 0.6710
    Round 1: 0.4592
    Round 2: 0.6989
    Round 3: 0.5265
    Round 4: 0.7384
    Round 5: 0.8710
    Round 6: 0.6416
    Round 7: 0.6832
    Round 8: 0.7944


(25, 0.0001)

## Baseline 2

In [5]:
def gen_baseline_2(params):
    # No hidden layer units, ignore parameter
    
    model = models.BaselineCNN2(hidden_layer_units=params['hidden_units'])
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, optimizer

In [8]:
epochs = 25
rounds = 10
seed = 0

learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = [10, 25]
hidden_layer_units = [10, 25, 50, 100]

batch_size, lr, hidden_units = model_tuning(
    gen_baseline_2, train, epochs, rounds, batch_sizes, learning_rates, hidden_layer_units, seed=seed)

batch_size, lr, hidden_units

Testing batch_size 10, lr=0.0001, units=10
    Round 0: 0.7136
    Round 1: 0.8627
    Round 2: 0.7815
    Round 3: 0.8811
    Round 4: 0.7991
    Round 5: 0.9208
    Round 6: 1.0042
    Round 7: 0.7632
    Round 8: 0.9754
    Round 9: 0.6918
  Average: 0.8393
Testing batch_size 10, lr=0.0001, units=25
    Round 0: 0.9505
    Round 1: 1.2050
    Round 2: 1.0164
    Round 3: 0.6881
    Round 4: 1.2187
    Round 5: 1.1406
    Round 6: 0.9415
    Round 7: 0.7762
    Round 8: 0.7240
    Round 9: 1.0407
  Average: 0.9702
Testing batch_size 10, lr=0.0001, units=50
    Round 0: 1.3807
    Round 1: 0.9928
    Round 2: 0.9718
    Round 3: 0.8047
    Round 4: 0.9663
    Round 5: 0.6732
    Round 6: 0.6444
    Round 7: 0.9131
    Round 8: 1.0252
    Round 9: 0.8202
  Average: 0.9192
Testing batch_size 10, lr=0.0001, units=100
    Round 0: 0.8572
    Round 1: 1.1305
    Round 2: 1.2486
    Round 3: 1.0130
    Round 4: 0.9079
    Round 5: 0.9733
    Round 6: 0.9500
    Round 7: 0.8811
    Round 8: 

(25, 0.0001, 25)

## Weight Sharing

In [9]:
def gen_weight_sharing(params):
    # No hidden layer units, ignore parameter
    
    model = models.WeightSharingCNN(hidden_layer_units=params['hidden_units'])
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, optimizer

In [10]:
epochs = 25
rounds = 10
seed = 0

learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = 10, 25
# Doesn't matter: no MLP after LeNet 
hidden_layer_units = [10, 25, 50, 100]

batch_size, lr, hidden_units = model_tuning(
    gen_weight_sharing, train, epochs, rounds, batch_sizes, learning_rates, hidden_layer_units, seed=seed)

batch_size, lr, hidden_units

Testing batch_size 10, lr=0.0001, units=10
    Round 0: 0.7591
    Round 1: 0.3877
    Round 2: 0.8379
    Round 3: 0.6004
    Round 4: 0.5764
    Round 5: 0.7948
    Round 6: 0.6878
    Round 7: 0.7023
    Round 8: 0.5114
    Round 9: 0.4330
  Average: 0.6291
Testing batch_size 10, lr=0.0001, units=25
    Round 0: 0.4893
    Round 1: 0.6177
    Round 2: 0.6025
    Round 3: 0.6476
    Round 4: 0.8818
    Round 5: 0.8530
    Round 6: 0.5474
    Round 7: 0.7395
    Round 8: 0.7035
    Round 9: 0.5998
  Average: 0.6682
Testing batch_size 10, lr=0.0001, units=50
    Round 0: 0.5974
    Round 1: 0.4296
    Round 2: 0.7356
    Round 3: 0.5215
    Round 4: 0.6665
    Round 5: 0.4150
    Round 6: 0.8220
    Round 7: 0.7235
    Round 8: 0.7509
    Round 9: 0.8333
  Average: 0.6495
Testing batch_size 10, lr=0.0001, units=100
    Round 0: 0.6128
    Round 1: 0.8416
    Round 2: 0.6294
    Round 3: 0.9614
    Round 4: 0.8297
    Round 5: 0.6966
    Round 6: 0.5451
    Round 7: 0.7425
    Round 8: 

(25, 0.0001, 10)

## Weight Sharing and Auxiliary Loss

In [3]:
from train import train_with_auxiliary_loss

In [4]:
def gen_weight_sharing_aux_loss(params):
    """ Generates the first baseline """
    model = models.WeightSharingAuxLossCNN(hidden_layer_units=params['hidden_units'])
    criterion = torch.nn.CrossEntropyLoss()
    aux_criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, aux_criterion, optimizer

### Aux Loss Weight 0.5

In [5]:
from evaluation import model_tuning_aux_loss

In [6]:
epochs = 25
rounds = 10
seed = 0

aux_loss_weights = [0.1, 0.5, 1.0, 2.0, 5.0]
learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = [25, 50]
hidden_layer_units = [10, 25, 50, 100]

aux_loss_weight, batch_size, lr, hidden_units = model_tuning_aux_loss(
    gen_weight_sharing_aux_loss, train_with_auxiliary_loss, epochs, rounds,
    batch_sizes, learning_rates, hidden_layer_units, aux_loss_weights, seed=seed)

aux_loss_weight, batch_size, lr, hidden_units

Testing aux_weight 0.1, batch_size 25, lr=0.0001, units=10
    Round 0: 0.1950
    Round 1: 0.1200
    Round 2: 0.1150
    Round 3: 0.1800
    Round 4: 0.1850
    Round 5: 0.1850
    Round 6: 0.1350
    Round 7: 0.1850
    Round 8: 0.1900
    Round 9: 0.2050
  Average: 0.1695
Testing aux_weight 0.1, batch_size 25, lr=0.0001, units=25
    Round 0: 0.1800
    Round 1: 0.1750
    Round 2: 0.1700
    Round 3: 0.1450
    Round 4: 0.1850
    Round 5: 0.1750
    Round 6: 0.1600
    Round 7: 0.1800
    Round 8: 0.1500
    Round 9: 0.1400
  Average: 0.1660
Testing aux_weight 0.1, batch_size 25, lr=0.0001, units=50
    Round 0: 0.2000
    Round 1: 0.1600
    Round 2: 0.1100
    Round 3: 0.1250
    Round 4: 0.1750
    Round 5: 0.1650
    Round 6: 0.1900
    Round 7: 0.2200
    Round 8: 0.2350
    Round 9: 0.1100
  Average: 0.1690
Testing aux_weight 0.1, batch_size 25, lr=0.0001, units=100
    Round 0: 0.1800
    Round 1: 0.2050
    Round 2: 0.1600
    Round 3: 0.1600
    Round 4: 0.1450
    Round

KeyboardInterrupt: 

In [8]:
epochs = 25
rounds = 10
seed = 0

aux_loss_weights = [0.1, 0.5, 1.0, 2.0, 5.0]
learning_rates = [0.001, 0.01]
batch_sizes = [25]
hidden_layer_units = [10, 25, 50, 100]

aux_loss_weight, batch_size, lr, hidden_units = model_tuning_aux_loss(
    gen_weight_sharing_aux_loss, train_with_auxiliary_loss, epochs, rounds,
    batch_sizes, learning_rates, hidden_layer_units, aux_loss_weights, seed=seed)

aux_loss_weight, batch_size, lr, hidden_units

Testing aux_weight 0.1, batch_size 25, lr=0.001, units=10
    Round 0: 0.1400
    Round 1: 0.1300
    Round 2: 0.1250
    Round 3: 0.1450
    Round 4: 0.1250
    Round 5: 0.1050
    Round 6: 0.0950
    Round 7: 0.1350
    Round 8: 0.1400
    Round 9: 0.1400
  Average: 0.1280
Testing aux_weight 0.1, batch_size 25, lr=0.001, units=25
    Round 0: 0.1200
    Round 1: 0.1600
    Round 2: 0.1600
    Round 3: 0.1100
    Round 4: 0.1150
    Round 5: 0.1250
    Round 6: 0.1550
    Round 7: 0.1550
    Round 8: 0.1300
    Round 9: 0.1150
  Average: 0.1345
Testing aux_weight 0.1, batch_size 25, lr=0.001, units=50
    Round 0: 0.1250
    Round 1: 0.1600
    Round 2: 0.0750
    Round 3: 0.0950
    Round 4: 0.1050
    Round 5: 0.1100
    Round 6: 0.1600
    Round 7: 0.1850
    Round 8: 0.0850
    Round 9: 0.1500
  Average: 0.1250
Testing aux_weight 0.1, batch_size 25, lr=0.001, units=100
    Round 0: 0.1200
    Round 1: 0.1300
    Round 2: 0.1550
    Round 3: 0.0900
    Round 4: 0.1100
    Round 5: 

    Round 7: 0.1650
    Round 8: 0.0750
    Round 9: 0.0950
  Average: 0.1095
Testing aux_weight 2.0, batch_size 25, lr=0.01, units=50
    Round 0: 0.1150
    Round 1: 0.0900
    Round 2: 0.0950
    Round 3: 0.1150
    Round 4: 0.1250
    Round 5: 0.1500
    Round 6: 0.0800
    Round 7: 0.1450
    Round 8: 0.1350
    Round 9: 0.1200
  Average: 0.1170
Testing aux_weight 2.0, batch_size 25, lr=0.01, units=100
    Round 0: 0.0750
    Round 1: 0.1200
    Round 2: 0.1800
    Round 3: 0.1200
    Round 4: 0.1450
    Round 5: 0.1050
    Round 6: 0.1100
    Round 7: 0.1300
    Round 8: 0.1550
    Round 9: 0.1350
  Average: 0.1275
Testing aux_weight 5.0, batch_size 25, lr=0.001, units=10
    Round 0: 0.1150
    Round 1: 0.0950
    Round 2: 0.1250
    Round 3: 0.1100
    Round 4: 0.1150
    Round 5: 0.0900
    Round 6: 0.1200
    Round 7: 0.0950
    Round 8: 0.1350
    Round 9: 0.1200
  Average: 0.1120
Testing aux_weight 5.0, batch_size 25, lr=0.001, units=25
    Round 0: 0.1450
    Round 1: 0.12

(25, 0.001, 50, 5.0)