# Hyperparameter Tuning

## Setup

In [1]:
%cd ..

/Users/Niels/Documents/EPFL/Master/DeepLearning/project1


### Imports

In [2]:
import time

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F

import models as models
from train import train
from evaluation import model_tuning

## Baseline

In [3]:
def gen_baseline(params):
    # No hidden layer units, ignore hidden_units parameter
    
    model = models.BaselineCNN()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, optimizer

In [4]:
epochs = 20
seed = 0

learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = 5, 10, 25
# Doesn't matter: no MLP after LeNet 
hidden_layer_units = [10]

batch_size, lr, _ = model_tuning(
    gen_baseline, train, epochs, batch_sizes, learning_rates, hidden_layer_units, seed=seed)

batch_size, lr

Testing batch_size 5, lr=0.0001, units=10
    Round 0: 0.6015
    Round 1: 0.6970
    Round 2: 0.7694
    Round 3: 0.7049
    Round 4: 0.6547
  Average: 0.6855
Testing batch_size 5, lr=0.001, units=10
    Round 0: 1.3536
    Round 1: 1.1893
    Round 2: 1.0124
    Round 3: 0.9906
    Round 4: 1.2841
  Average: 1.1660
Testing batch_size 5, lr=0.01, units=10
    Round 0: 0.6954
    Round 1: 0.6954
    Round 2: 0.6812
    Round 3: 0.6864
    Round 4: 0.6891
  Average: 0.6895
Testing batch_size 10, lr=0.0001, units=10
    Round 0: 0.5337
    Round 1: 0.5477
    Round 2: 0.6918
    Round 3: 0.4313
    Round 4: 0.6364
  Average: 0.5682
Testing batch_size 10, lr=0.001, units=10
    Round 0: 1.1887
    Round 1: 1.3569
    Round 2: 1.4040
    Round 3: 1.2193
    Round 4: 0.8856
  Average: 1.2109
Testing batch_size 10, lr=0.01, units=10
    Round 0: 0.6112
    Round 1: 0.6704
    Round 2: 0.8019
    Round 3: 0.8272
    Round 4: 0.8827
  Average: 0.7587
Testing batch_size 25, lr=0.0001, units=10


(25, 0.0001)

## Baseline 2

In [5]:
def gen_baseline_2(params):
    # No hidden layer units, ignore parameter
    
    model = models.BaselineCNN2(hidden_layer_units=params['hidden_units'])
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, optimizer

In [6]:
epochs = 25
seed = 0

learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = 5, 10, 25
# Doesn't matter: no MLP after LeNet 
hidden_layer_units = [10, 25, 50, 100]

batch_size, lr, hidden_units = model_tuning(
    gen_baseline_2, train, epochs, batch_sizes, learning_rates, hidden_layer_units, seed=seed)

batch_size, lr, hidden_units

Testing batch_size 5, lr=0.0001, units=10
    Round 0: 0.9576
    Round 1: 0.9784
    Round 2: 1.1027
    Round 3: 1.0911
    Round 4: 0.8011
  Average: 0.9862
Testing batch_size 5, lr=0.0001, units=25
    Round 0: 0.7543
    Round 1: 0.8645
    Round 2: 1.3593
    Round 3: 1.4834
    Round 4: 0.8232
  Average: 1.0569
Testing batch_size 5, lr=0.0001, units=50
    Round 0: 0.8524
    Round 1: 0.7718
    Round 2: 1.1971
    Round 3: 1.3385
    Round 4: 1.1664
  Average: 1.0652
Testing batch_size 5, lr=0.0001, units=100
    Round 0: 0.9310
    Round 1: 1.2756
    Round 2: 1.4028
    Round 3: 1.5334
    Round 4: 0.9613
  Average: 1.2208
Testing batch_size 5, lr=0.001, units=10
    Round 0: 1.5645
    Round 1: 0.6819
    Round 2: 3.0743
    Round 3: 2.0154
    Round 4: 0.6720
  Average: 1.6016
Testing batch_size 5, lr=0.001, units=25
    Round 0: 1.2003
    Round 1: 1.0021
    Round 2: 2.9099
    Round 3: 3.3612
    Round 4: 1.3876
  Average: 1.9722
Testing batch_size 5, lr=0.001, units=50


(25, 0.0001, 50)

## Weight Sharing

In [7]:
def gen_weight_sharing(params):
    # No hidden layer units, ignore parameter
    
    model = models.WeightSharingCNN(hidden_layer_units=params['hidden_units'])
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    return model, criterion, optimizer

In [8]:
epochs = 25
seed = 0

learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = 5, 10, 25
# Doesn't matter: no MLP after LeNet 
hidden_layer_units = [10, 25, 50, 100]

batch_size, lr, hidden_units = model_tuning(
    gen_weight_sharing, train, epochs, batch_sizes, learning_rates, hidden_layer_units, seed=seed)

batch_size, lr, hidden_units

Testing batch_size 5, lr=0.0001, units=10
    Round 0: 0.7791
    Round 1: 0.6100
    Round 2: 1.0758
    Round 3: 0.8873
    Round 4: 0.6716
  Average: 0.8048
Testing batch_size 5, lr=0.0001, units=25
    Round 0: 0.8937
    Round 1: 0.7156
    Round 2: 1.0696
    Round 3: 0.7496
    Round 4: 0.8852
  Average: 0.8627
Testing batch_size 5, lr=0.0001, units=50
    Round 0: 0.8810
    Round 1: 0.8129
    Round 2: 1.1071
    Round 3: 0.9494
    Round 4: 0.8479
  Average: 0.9197
Testing batch_size 5, lr=0.0001, units=100
    Round 0: 0.9652
    Round 1: 1.0826
    Round 2: 1.0080
    Round 3: 0.8521
    Round 4: 1.1544
  Average: 1.0124
Testing batch_size 5, lr=0.001, units=10
    Round 0: 1.2140
    Round 1: 1.1916
    Round 2: 1.4349
    Round 3: 0.6927
    Round 4: 1.6243
  Average: 1.2315
Testing batch_size 5, lr=0.001, units=25
    Round 0: 1.4043
    Round 1: 1.1313
    Round 2: 1.4532
    Round 3: 0.9771
    Round 4: 1.5055
  Average: 1.2943
Testing batch_size 5, lr=0.001, units=50


KeyboardInterrupt: 

## Weight Sharing and Auxiliary Loss