# Artificial Intelligence Nanodegree

## Convolutional Neural Networks

---

In this notebook, we train an MLP to classify images from the MNIST database.

### 1. Load MNIST Database

In [None]:
from keras.datasets import mnist
import numpy as np

# use Keras to import pre-shuffled MNIST database
# NOTE: Hold out one test set for final evaluation
(x_train, y_train), (x_test, y_test) = mnist.load_data()

### 2. Rescale the Images by Dividing Every Pixel in Every Image by 255

In [None]:
# rescale [0,255] --> [0,1]
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255

### 3. Encode Categorical Integer Labels Using a One-Hot Scheme

In [None]:
from keras.utils import np_utils
y_ohe_train = np_utils.to_categorical(y_ohe_train, 10)
y_ohe_test = np_utils.to_categorical(y_ohe_test, 10)

### 4. Define the Model Architecture Function

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten

shape = x.shape[1:]
# From the tuorial https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/
def create_model(dropout_rate, hidden_layer, nodes, optimizer):
    model = Sequential()
    model.add(Flatten(input_shape=shape))
    
    for _ in range(hidden_layer):
        model.add(Dense(nodes, activation='relu'))
        model.add(Dropout(dropout_rate))
    
    model.add(Dense(10, activation='softmax'))
    
    # compile the model
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

### 5. Define the Grid Search Function

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

def nn_search(data_dict,
              batch_size=[128],
              dropout_rate=[0.2],
              epochs=[10],
              hidden_layers=[2],
              nodes=[512],
              optimizer=['rmsprop']):
    
    model = KerasClassifier(build_fn=create_model, verbose=0)

    param_grid = dict(batch_size=batch_size,
                      dropout_rate=dropout_rate,
                      epochs=epochs,
                      nodes=nodes,
                      optimizer=optimizer)
    
    # NOTE: 3-fold cross validation is the default
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
    grid_result = grid.fit(x_train, y_ohe_train)

    data_dict[grid_result.cv_results_['params']] = grid_result.cv_results_['mean_test_score']
    
    # summarize results
    print("Best: {} using {}".format(grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("{} ({}) with: {}".format(mean, stdev, param))
        
    return data_dict    

### 6. Run Using the Different Parameters

In [None]:
# NOTE: We do not do a full grid-search as that is too time consuming
#       Instead we would like to investigate how the score changes with the parameters

data_dict = dict()

# Run with standard parameters
data_dict = nn_search(data_dict)

batch_size = [64, 256]
for bs in batch_size:
    data_dict = nn_search(data_dict, batch_size=bs)
    
dropout_rate = [0.1, 0.4]
for dr in dropout_rate:
    data_dict = nn_search(data_dict, dropout_rate=dr)

epochs = [5, 20]
for e in epochs:
    data_dict = nn_search(data_dict, epochs=e)

hidden_layers = [1, 3]
for hl in hidden_layers:
    data_dict = nn_search(data_dict, hidden_layers=hl)

nodes = [256, 1024]
for n in nodes:
    data_dict = nn_search(data_dict, nodes=n)

optimizer = ['sdg', 'adadelta']    
for o in optimizer:
    data_dict = nn_search(data_dict, optimizer=o)
    
# TODO: Plot trends
# TODO: Take the best of all worlds, and check evaluate against the final test set

### 8. Calculate the Classification Accuracy on the Test Set (Before Training)

In [None]:
# evaluate test accuracy
score = model.evaluate(X_test, y_test, verbose=0)
accuracy = 100*score[1]

# print test accuracy
print('Test accuracy: %.4f%%' % accuracy)

### 9. Train the Model

In [None]:
from keras.callbacks import ModelCheckpoint   

# train the model
checkpointer = ModelCheckpoint(filepath='mnist.model.best.hdf5', 
                               verbose=1, save_best_only=True)
hist = model.fit(X_train, y_train, batch_size=128, epochs=10,
          validation_split=0.2, callbacks=[checkpointer],
          verbose=1, shuffle=True)

### 10. Load the Model with the Best Classification Accuracy on the Validation Set

In [None]:
# load the weights that yielded the best validation accuracy
model.load_weights('mnist.model.best.hdf5')

### 11. Calculate the Classification Accuracy on the Test Set

In [None]:
# evaluate test accuracy
score = model.evaluate(X_test, y_test, verbose=0)
accuracy = 100*score[1]

# print test accuracy
print('Test accuracy: %.4f%%' % accuracy)