In [14]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
import keras.backend as K

from cifar_model import get_cifar10_cnn

## CIFARNET Convolutional Neural Network Experiments

Here, I tune and train CNN models, to recreate the empirical results of section 5 of the paper.

As specified in the paper, I fix the parameter $\beta_1$ at .99, and tune the learning rate $\alpha$ and the hyperparameter $\beta_2$ using a gridsearch, as done in the paper. 

The authors further specified that the number of hidden units is 100, and that they use the Relu activation function. I'll do the same.

## 0. Load CIFAR Dataset

I've already created train and test splits for the MNIST dataset. They are conviniently stored as compressed numpy arrays.

In [44]:
X_train = np.load("../../data/CIFAR/X_train.npy")
X_test = np.load("../../data/CIFAR/X_test.npy")
y_train = np.load("../../data/CIFAR/y_train.npy")
y_test = np.load("../../data/CIFAR/y_test.npy")

## 1. A framework for exhaustive gridsearch

The hyperpameters that I'll need to tune by gridsearch are: 

- $\beta_2$
- $\alpha$.

To do so in a neat fashion, and make use of all my cores (CPU training :( ) , I'll use the `GridSearchCV` class from `sklearn`, with the `KerasClassifier` wrapper.

The interface of this wrapper requres that I define a function that can be called with a set of hyperparameter options and create a `Sequential` model that can be compiled and trained.

The function that does this is in the file `cifar_model.py`

Note the hyperparameters that I do not tune, as they are fixed by the authors:

- $\beta_1 = .9$
- Discount rate: $\alpha_t$ = $\frac{\alpha}{\sqrt{t}}$
- Batch size = 128

In [8]:
# Example of how this works:
get_cifar10_cnn().summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 32, 32, 64)        6976      
_________________________________________________________________
activation_9 (Activation)    (None, 32, 32, 64)        0         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 16, 16, 64)        256       
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 16, 16, 64)        147520    
_________________________________________________________________
activation_10 (Activation)   (None, 16, 16, 64)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 16384)             0         
__________

## 2. Reshaping data

The input needs to be of dimension (height,width,depth) or (depth, height, width), depeiding on the Keras settings. 

In [48]:
trainlength, testlength = X_train.shape[0], X_test.shape[0]
# Reshape the X's, according to our channel setting. 
if K.image_data_format() == "channels_last":
    X_train = X_train.reshape(trainlength, 3, 32, 32).transpose(0,2,3,1)
    X_test = X_test.reshape((testlength, 3, 32, 32)).transpose(0,2,3,1)
else:
    X_train = X_train.reshape(trainlength, 3, 32, 32)
    X_test = X_test.reshape((testlength, 3, 32, 32))

In [49]:
print(X_train.shape)
print(X_test.shape)

(50000, 32, 32, 3)
(10000, 32, 32, 3)


## 3. Set up gridsearches

In [50]:
"""
Make a hyperparemter grid to search through
"""
beta2_range = np.append(np.arange(.990, .999, .0025), .999)
alpha_range = [.0001*10**i for i in range(5)]
param_grid = dict(lr=alpha_range, beta_2=beta2_range)

In [51]:
param_grid

{'beta_2': array([0.99  , 0.9925, 0.995 , 0.9975, 0.999 ]),
 'lr': [0.0001, 0.001, 0.01, 0.1, 1.0]}

In [57]:
adam_model = KerasClassifier(build_fn=get_cifar10_cnn, epochs=50, batch_size=128, verbose=2)
adam_grid = GridSearchCV(estimator=adam_model, param_grid=param_grid, n_jobs=1, verbose=1)
adam_grid_result = adam_grid.fit(X_train, y_train)

Fitting 3 folds for each of 25 candidates, totalling 75 fits
Epoch 1/50


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-57-3e7bceec9fae>", line 3, in <module>
    adam_grid_result = adam_grid.fit(X_train, y_train)
  File "/usr/local/lib/python3.6/site-packages/sklearn/model_selection/_search.py", line 639, in fit
    cv.split(X, y, groups)))
  File "/usr/local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 779, in __call__
    while self.dispatch_one_batch(iterator):
  File "/usr/local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 625, in dispatch_one_batch
    self._dispatch(tasks)
  File "/usr/local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 588, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "/usr/local/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py", l

KeyboardInterrupt: 

In [58]:
"""
Gridsearch through learning rate and beta_2 combinations, using the AMSGrad optimizer
"""
# Same grid, but now using AMS optimizer
param_grid_ams = dict(lr=alpha_range, beta_2=beta2_range, amsgrad = [True])

ams_model = KerasClassifier(build_fn=get_cifar10_cnn, epochs=50, batch_size=128, verbose=2)
ams_grid = GridSearchCV(estimator=ams_model, param_grid=param_grid_ams, n_jobs=-1, verbose = 1)
ams_grid_result = ams_grid.fit(X_train, y_train)

Fitting 3 folds for each of 25 candidates, totalling 75 fits
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-58-01bdeb76b132>", line 9, in <module>
    ams_grid_result = ams_grid.fit(X_train, y_train)
  File "/usr/local/lib/python3.6/site-packages/sklearn/model_selection/_search.py", line 639, in fit
    cv.split(X, y, groups)))
  File "/usr/local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 789, in __call__
    self.retrieve()
  File "/usr/local/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 699, in retrieve
    self._output.extend(job.get(timeout=self.timeout))
  File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 638, in get
    self.wait(timeout)
  File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocess

KeyboardInterrupt: 