### Pete Champlin
### Big Data 230 A
#### Week 7 Assignment
6/6/2020

In [2]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K


# Use TensorFlow Backend
import tensorflow as tf
tf.set_random_seed(42) # For reproducibility

# Print out Keras version
print(keras.__version__)

In [3]:
# Configure MLflow Experiment
#mlflow_experiment_id = 2102416

# Including MLflow
import mlflow
import mlflow.keras
import os
print("MLflow Version: %s" % mlflow.__version__)

In [4]:
import warnings
warnings.filterwarnings("ignore")

#### Prepare training and test data

In [6]:
# -----------------------------------------------------------

num_classes = 10

# -----------------------------------------------------------
# Image Datasets

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

#### Define parameterized CNN function

In [8]:
def runCNN(activation, choose_optimizer, lr, epochs, batch_size, verbose = 0):
  
  # Documentaion: https://keras.io/guides/sequential_model/
  
#The input layer is a grey scale image of 28x28 pixels. 
#The first convolution layer maps one grayscale image to 32 feature maps using the activation function
#The second convolution layer maps the image to 64 feature maps using the activation function
#The pooling layer down samples image by 2x so you have a 14x14 matrix 
#The first dropout layer delete random neurons (regularization technique to avoid overfitting)
#The fully connected feed-forward maps the features with 128 neurons in the hidden layer
#The second dropout layer delete random neurons (regularization technique to avoid overfitting)
#Apply softmax with 10 hidden layers to identify digit.

  # Building up our CNN
  model = Sequential() # type: keras.engine.sequential.Sequential
  
  # Convolution Layer
  model.add(Conv2D( # 2D convolution layer
              32, # filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution)
              kernel_size=(3, 3), # An tuple/list of 2 integers, specifying the height and width of the 2D convolution window
              activation=activation,
              input_shape=input_shape) # Only needed when first layer added
           ) 
  
  # Convolution layer
  model.add(Conv2D(
              64, 
              kernel_size=(3, 3), 
              activation=activation))
  
  # MaxPooling2D layer
  # Pooling with stride (2, 2)
  #  Downsamples the input representation by taking the maximum value over the window defined by pool_size for each dimension along the
  #  features axis. The window is shifted by strides in each dimension. The resulting output when using "valid" padding option has a
  #  shape(number of rows or columns) of: output_shape = (input_shape - pool_size + 1) / strides)
  model.add(MaxPooling2D(
              pool_size=(2, 2)))
  
  # Delete neuron randomly while training (remain 75%)
  #   Regularization technique to avoid overfitting
  model.add(Dropout(0.25))
  
  # Flatten layer 
  model.add(Flatten())
  
  # Fully connected Layer
  model.add(Dense(128, activation=activation))
  
  # Delete neuron randomly while training (remain 50%) 
  #   Regularization technique to avoid overfitting
  model.add(Dropout(0.5))
  
  # Apply Softmax
  model.add(Dense(num_classes, activation='softmax'))

  # change optimizer parameters
  if choose_optimizer == 'adadelta':
      optimizer = keras.optimizers.Adadelta(lr=lr, rho=0.95, epsilon=None, decay=0.0)
  elif choose_optimizer == 'sgd':
      optimizer = keras.optimizers.SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=False)
  elif choose_optimizer == 'nag':
      optimizer = keras.optimizers.SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=True)
  elif choose_optimizer == 'rmsprop':
      optimizer = keras.optimizers.RMSprop(lr=lr, rho=0.95, epsilon=None, decay=0.0)
  elif choose_optimizer == 'adam':
      optimizer = keras.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
      
  # Log MLflow
  #with mlflow.start_run(experiment_id = mlflow_experiment_id) as run:
  with mlflow.start_run() as run:
  
    # Loss function (crossentropy) and Optimizer
    model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=optimizer,
              metrics=['accuracy'])

    # Fit our model
    model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=verbose,
          validation_data=(x_test, y_test))

    #model.summary()
    
    # Evaluate our model
    score = model.evaluate(x_test, y_test, verbose=0)

    # Log Parameters
    mlflow.log_param("activation function", activation)
    mlflow.log_param("optimizer", choose_optimizer)
    mlflow.log_param("learning rate", lr)
    mlflow.log_metric("test loss", score[0])
    mlflow.log_metric("test accuracy", score[1])
    
    # Log Model
    mlflow.keras.log_model(model, "model")
    
  # Return
  return score

#### Run models with various optimizers and learning rates

In [10]:
# REMINDER: input_shape = (28, 28, 1), training sample size = 60000, and test sample size = 10000

#import warnings
warnings.filterwarnings("ignore")

activation = 'relu' # 'sigmoid', 'tanh'
batch_size = 128 # Batch size - the number of training samples to work through before the model’s internal parameters are updated
epochs = 1 # Epoch - the number of complete passes through the training dataset

choose_optimizers = ['adadelta', 'sgd', 'nag', 'rmsprop', 'adam']
learning_rates = [10.0, 1.0, 0.1, 0.01, 0.001]

for choose_optimizer in choose_optimizers:
  for learning_rate in learning_rates:
    cnn_score = runCNN(activation, choose_optimizer, learning_rate, epochs, batch_size)
    print('choose_optimizer:', choose_optimizer)
    print('learning_rate:', learning_rate)
    print('Test loss:', cnn_score[0])
    print('Test accuracy:', cnn_score[1])
    print('')
     

With a small batch and epoch, the adam and adadelta optimizers achieved the highest accuracies, over 98%.

I'll run the three highest-performing models with higher batch sizes.

In [12]:
warnings.filterwarnings("ignore")

activation = 'relu'
batch_size = 1000
epochs = 3
choose_optimizer = 'adam'
learning_rate = 0.001
cnn_score = runCNN(activation, choose_optimizer, learning_rate, epochs, batch_size)

activation = 'relu'
batch_size = 1000
epochs = 3
choose_optimizer = 'adam'
learning_rate = 0.01
cnn_score = runCNN(activation, choose_optimizer, learning_rate, epochs, batch_size)

activation = 'relu'
batch_size = 1000
epochs = 3
choose_optimizer = 'adadelta'
learning_rate = 1.0
cnn_score = runCNN(activation, choose_optimizer, learning_rate, epochs, batch_size)


With a batch size of 1000 and 3 epochs, adam using a learning rate of 0.01 achieved 98.6% accuracy.

I will run that again with a much higher batch size.

In [14]:
activation = 'relu'
batch_size = 20000
epochs = 10
choose_optimizer = 'adam'
learning_rate = 0.01
cnn_score = runCNN(activation, choose_optimizer, learning_rate, epochs, batch_size)

Accuracy: 0.986

Loss: 0.046

In [16]:
#Example model run with model.summary displayed

activation = 'relu'
batch_size = 1000
epochs = 1
choose_optimizer = 'adam'
learning_rate = 0.01
cnn_score = runCNN(activation, choose_optimizer, learning_rate, epochs, batch_size)
print('Test loss:', cnn_score[0])
print('Test accuracy:', cnn_score[1])