In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pylab as plt

from modules.utils import load_cifar10
from modules.cnn_with_spectral_pooling import CNN_Spectral_Pool

% matplotlib inline
% load_ext autoreload
% autoreload 2

# Download CIFAR data, if necessary, and load it

In [2]:
xtrain, ytrain, xtest, ytest = load_cifar10(5, get_test_data=True, channels_last=False)

file already downloaded..
getting batch 1
getting batch 2
getting batch 3
getting batch 4
getting batch 5


In [3]:
xtrain.shape, ytrain.shape, xtest.shape, ytest.shape

((50000, 3, 32, 32), (50000,), (10000, 3, 32, 32), (10000,))

# Subsample the data for hyperparameter search

In [4]:
sample_size = 2 ** 14
sampleind = np.random.choice(np.arange(xtrain.shape[0]),
                             size=sample_size, replace=False)
xtrain_sample = xtrain[sampleind]
ytrain_sample = ytrain[sampleind]
xtrain_sample.shape, ytrain_sample.shape


((16384, 3, 32, 32), (16384,))

# Define ranges for hyperparameters

In [5]:
learning_rate_range = [1e-5, 2e-3]
l2_norm_range = [1e-5, 1e-2]
M_range = [3, 9]
gamma_range = [0.5, 0.9]


# Perform the hyperparameter search

In [6]:
validation_size = 2 ** 10
hyperparams = []
best_accuracies = []
full_model_names = [] 
for search_idx in range(10):
    learning_rate = np.exp(np.random.uniform(
        low=np.log(learning_rate_range[0]),
        high=np.log(learning_rate_range[1])
    ))
    l2_norm = np.exp(np.random.uniform(
        low=np.log(l2_norm_range[0]),
        high=np.log(l2_norm_range[1])
    ))
    M = np.random.randint(
        low = M_range[0],
        high = M_range[1] + 1
    )
    gamma = np.random.uniform(
        low = gamma_range[0],
        high = gamma_range[1]
    )
    hyperparams.append({
        'learning_rate': learning_rate,
        'l2_norm': l2_norm,
        'M': M,
        'gamma': gamma,
    })
    tf.reset_default_graph()
    cnn = CNN_Spectral_Pool(M=M,
                            num_output=10,
                            verbose=False,
                            learning_rate=learning_rate,
                            l2_norm=l2_norm,
                            lr_reduction_factor=0.5,
                            lr_reduction_epochs=[11,21,31,41],
                            gamma=gamma)
    print('Trying hyperparameters: ')
    print(hyperparams[-1])
    cnn.train(xtrain_sample[:-validation_size],
              ytrain_sample[:-validation_size],
              xtrain_sample[-validation_size:],
              ytrain_sample[-validation_size:],
              batch_size=256,
              epochs=30,
              extra_conv_layer=True,
              use_global_averaging=True,
              model_name='hyperparam_search'
    )
    best_accuracies.append(cnn.best_acc)
    full_model_names.append(cnn.full_model_name)
    
print('Overall best accuracy: {0:.3f}'.format(np.max(best_accuracies)))
print('Full Model Name: {0}'.format(full_model_names[np.argmax(best_accuracies)]))
print('Hyperparameters achieving this result: ')
print(hyperparams[np.argmax(best_accuracies)])

Trying hyperparameters: 
{'l2_norm': 0.0024856655044985746, 'M': 7, 'learning_rate': 0.0014706372136129031, 'gamma': 0.6315427063962382}
Building tf graph...
(?, 10)
number of batches for training: 60 validation: 4
training epoch 1 

	Best validation accuracy! iteration:60 accuracy: 19.140625%

training epoch 2 

	Best validation accuracy! iteration:120 accuracy: 22.16796875%

training epoch 3 

	Best validation accuracy! iteration:180 accuracy: 26.26953125%

training epoch 4 

	Best validation accuracy! iteration:240 accuracy: 32.12890625%

training epoch 5 

	Best validation accuracy! iteration:300 accuracy: 33.3984375%

training epoch 6 

	Best validation accuracy! iteration:360 accuracy: 38.671875%

training epoch 7 

	Best validation accuracy! iteration:420 accuracy: 41.30859375%

training epoch 8 

	Best validation accuracy! iteration:480 accuracy: 42.28515625%

training epoch 9 

	Best validation accuracy! iteration:540 accuracy: 42.67578125%

training epoch 10 

	Best validatio

__Note:__ We performed the hyperparameter search across multiple GPU instances. This notebook shows the parameter search that resulted in the best hyperparameters across all the instances we ran.

In [7]:
tf.reset_default_graph()
overall_best_model = full_model_names[np.argmax(best_accuracies)]
overall_best_hyperparams = hyperparams[np.argmax(best_accuracies)]
cnn = CNN_Spectral_Pool(M=overall_best_hyperparams['M'],
                        num_output=10,
                        verbose=False,
                        learning_rate=overall_best_hyperparams['learning_rate'],
                        l2_norm=overall_best_hyperparams['l2_norm'],
                        lr_reduction_factor=0.5,
                        lr_reduction_epochs=[11,21,31,41],
                        gamma=overall_best_hyperparams['gamma'])
cnn.calc_test_accuracy(xtest, ytest, 'hyperparam_search/{0}'.format(overall_best_model))

Building tf graph...
(?, 10)
number of batches for testing: 20
Loading pre-trained model
INFO:tensorflow:Restoring parameters from model/hyperparam_search/hyperparam_search_1513530707.2960517
Test accuracy: 63.170
