In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pylab as plt

from modules.utils import load_cifar10
from modules.cnn_with_spectral_pooling import CNN_Spectral_Pool

% matplotlib inline
% load_ext autoreload
% autoreload 2

# Download CIFAR data, if necessary, and load it

In [None]:
xtrain, ytrain, xtest, ytest = load_cifar10(5, get_test_data=True, channels_last=False)

In [None]:
xtrain.shape, ytrain.shape, xtest.shape, ytest.shape

# Subsample the data for hyperparameter search

In [None]:
sample_size = 2 ** 14
sampleind = np.random.choice(np.arange(xtrain.shape[0]),
                             size=sample_size, replace=False)
xtrain_sample = xtrain[sampleind]
ytrain_sample = ytrain[sampleind]
xtrain_sample.shape, ytrain_sample.shape


# Define ranges for hyperparameters

In [None]:
learning_rate_range = [1e-5, 2e-3]
l2_norm_range = [1e-5, 1e-2]
M_range = [3, 9]
gamma_range = [0.5, 0.9]


# Perform the hyperparameter search

In [None]:
validation_size = 2 ** 10
hyperparams = []
best_accuracies = []
full_model_names = [] 
for search_idx in range(10):
    learning_rate = np.exp(np.random.uniform(
        low=np.log(learning_rate_range[0]),
        high=np.log(learning_rate_range[1])
    ))
    l2_norm = np.exp(np.random.uniform(
        low=np.log(l2_norm_range[0]),
        high=np.log(l2_norm_range[1])
    ))
    M = np.random.randint(
        low = M_range[0],
        high = M_range[1] + 1
    )
    gamma = np.random.uniform(
        low = gamma_range[0],
        high = gamma_range[1]
    )
    hyperparams.append({
        'learning_rate': learning_rate,
        'l2_norm': l2_norm,
        'M': M,
        'gamma': gamma,
    })
    tf.reset_default_graph()
    cnn = CNN_Spectral_Pool(M=M,
                            num_output=10,
                            verbose=False,
                            learning_rate=learning_rate,
                            l2_norm=l2_norm,
                            lr_reduction_factor=0.5,
                            lr_reduction_epochs=[11,21,31,41],
                            gamma=gamma)
    print('Trying hyperparameters: ')
    print(hyperparams[-1])
    cnn.train(xtrain_sample[:-validation_size],
              ytrain_sample[:-validation_size],
              xtrain_sample[-validation_size:],
              ytrain_sample[-validation_size:],
              batch_size=256,
              epochs=30,
              extra_conv_layer=True,
              use_global_averaging=True,
              model_name='hyperparam_search'
    )
    best_accuracies.append(cnn.best_acc)
    full_model_names.append(cnn.full_model_name)
    
print('Overall best accuracy: {0:.3f}'.format(np.max(best_accuracies)))
print('Full Model Name: {0}'.format(full_model_names[np.argmax(best_accuracies)]))
print('Hyperparameters achieving this result: ')
print(hyperparams[np.argmax(best_accuracies)])

__Note:__ in the above output, train acc is not same as validation acc even though they are the same because of dropout applied in train and not in validation

In [None]:
tf.reset_default_graph()
overall_best_model = full_model_names[np.argmax(best_accuracies)]
overall_best_hyperparams = hyperparams[np.argmax(best_accuracies)]
cnn = CNN_Spectral_Pool(M=overall_best_hyperparams['M'],
                        num_output=10,
                        verbose=False,
                        learning_rate=overall_best_hyperparams['learning_rate'],
                        l2_norm=overall_best_hyperparams['l2_norm'],
                        lr_reduction_factor=0.5,
                        lr_reduction_epochs=[11,21,31,41],
                        gamma=overall_best_hyperparams['gamma'])
cnn.calc_test_accuracy(xtest, ytest, 'hyperparam_search/{0}'.format(overall_best_model))