# Artificial Intelligence Nanodegree

## Convolutional Neural Networks

---

In this notebook, we train an MLP to classify images from the MNIST database.

### 1. Load MNIST Database

In [None]:
%matplotlib notebook

import matplotlib.pyplot as plt
from keras.datasets import mnist
import numpy as np

logging = False

np.random.seed(42)

# use Keras to import pre-shuffled MNIST database
# NOTE: Hold out one test set for final evaluation
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
import tensorflow as tf
from keras import backend as K

# NOTE: View in the jupyter console
if logging:
    session = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    K.set_session(session)

### 2. Rescale the Images by Dividing Every Pixel in Every Image by 255

In [None]:
# rescale [0,255] --> [0,1]
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255

### 3. Encode Categorical Integer Labels Using a One-Hot Scheme

In [None]:
from keras.utils import np_utils
y_ohe_train = np_utils.to_categorical(y_train, 10)
y_ohe_test = np_utils.to_categorical(y_test, 10)

### 4. Define the Model Architecture Function

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten

shape = x_train.shape[1:]

def create_model(dropout_rate=0.2, hidden_layers=2, nodes=512, optimizer="rmsprop"):
    # From the tuorial https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/
    print("...running create_model...")
    model = Sequential()
    model.add(Flatten(input_shape=shape))
    
    for _ in range(hidden_layers):
        model.add(Dense(nodes, activation='relu'))
        model.add(Dropout(dropout_rate))
    
    model.add(Dense(10, activation='softmax'))
    
    # compile the model
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

### 5. Create helping functions

In [None]:
import time
from datetime import timedelta

def timer(func):
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        print("Elapsed time: {}\n\n\n".format(str(timedelta(seconds=time.time() - start))))
        return result
              
    return wrapper

In [None]:
def plot_scan(scan_dict, train_scores, test_scores):
    # From http://scikit-learn.org/stable/auto_examples/model_selection/plot_validation_curve.html#sphx-glr-auto-examples-model-selection-plot-validation-curve-py
    
    fig, ax = plt.subplots()
    
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    ax.set_title("Validation curve")
    ax.set_xlabel(scan_dict["scan_param"])
    ax.set_ylabel("Score")

    ax.plot(scan_dict["param_range"], train_scores_mean, label="Training score")
    ax.fill_between(scan_dict["param_range"], train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.2)
    ax.plot(scan_dict["param_range"], test_scores_mean, label="Cross-validation score")
    ax.fill_between(scan_dict["param_range"], test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.2)
    ax.legend(loc="best")

In [None]:
from sklearn.model_selection import validation_curve

@timer
def perform_scan(x_train, y_train, scan_dict, model):
    print("Scanning {}".format(scan_dict["param_name"]))
    train_scores, test_scores =\
        validation_curve(model, 
                         x_train,
                         y_train,
                         param_name=scan_dict["param_name"],
                         param_range=scan_dict["param_range"], 
                         cv=3, 
                         scoring="accuracy")
        
    plot_scan(scan_dict, train_scores, test_scores)

### 6. Generate the validation curves

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier

model = KerasClassifier(build_fn=create_model, verbose=0)

# NOTE: 1/3 of the calculations are being done again every time here
scan_dicts =[dict(param_name="dropout_rate", scan_param="Dropout rate", param_range=[0.0, 0.2, 0.4]),
             dict(param_name="batch_size", scan_param="Batch size", param_range=[64, 128, 256]),
             dict(param_name="epochs", scan_param="Epochs", param_range=[5, 10, 20]),
             dict(param_name="hidden_layers", scan_param="Hidden layers", param_range=[1, 2, 3]),
             dict(param_name="nodes", scan_param="Nodes", param_range=[256, 512, 1024]),
             dict(param_name="optimizer", scan_param="Optimizer", param_range=["sgd", "rmsprop", "adadelta"])]

for scan_dict in scan_dicts:
    perform_scan(x_train, y_ohe_train, scan_dict, model)

### 7. Create the "ultimate" model

In [None]:
from keras.callbacks import ModelCheckpoint
# We do so by combining the maximas in the created curves
# NOTE: This is not very scientific

model = create_model(dropout_rate=0.4, hidden_layers=2, nodes=512, optimizer="rmsprop")

# train the model
checkpointer = ModelCheckpoint(filepath='mnist.model.best.hdf5', 
                               verbose=1, save_best_only=True)

_ = model.fit(x_train, y_ohe_train, batch_size=64, epochs=20,
              validation_split=0.2, callbacks=[checkpointer],
              verbose=1, shuffle=True)

### 10. Load the Model with the Best Classification Accuracy on the Validation Set

In [None]:
# load the weights that yielded the best validation accuracy
model.load_weights('mnist.model.best.hdf5')

### 11. Calculate the Classification Accuracy on the Test Set

In [None]:
# evaluate test accuracy
score = model.evaluate(x_test, y_ohe_test, verbose=0)
accuracy = 100*score[1]

# print test accuracy
print('Test accuracy: {:.4f} %'.format(accuracy))