## An attempt to create a model for the MNIST dataset using Tensorflow 2.0 beta

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import matplotlib.pyplot as plt

### Data

In [None]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

### Baseline Model

In [None]:
hidden_layer_size = 100
baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28)),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

baseline_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy', 'sparse_categorical_crossentropy'])
baseline_model.summary()

In [None]:
epoch_size = 10
baseline_history = baseline_model.fit(x_train, y_train, epochs = epoch_size, validation_data = (x_test, y_test), verbose = 2)

In [None]:
baseline_model.evaluate(x_test, y_test)

### Small model

In [None]:
hidden_layer_size = 10
small_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28)),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

small_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy', 'sparse_categorical_crossentropy'])
small_model.summary()

In [None]:
small_history = small_model.fit(x_train, y_train, epochs = epoch_size, validation_data = (x_test, y_test), verbose = 2)

In [None]:
small_model.evaluate(x_test, y_test)

### Larger model

In [None]:
hidden_layer_size = 1000
large_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28)),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

large_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy', 'sparse_categorical_crossentropy'])
large_model.summary()

In [None]:
large_history = large_model.fit(x_train, y_train, epochs = epoch_size, validation_data = (x_test, y_test), verbose = 2)

In [None]:
large_model.evaluate(x_test, y_test)

### Plot training and validation losses

In [None]:
# Function taken from: https://www.tensorflow.org/beta/tutorials/keras/overfit_and_underfit
def plot_history(histories, key = 'sparse_categorical_crossentropy'):
  plt.figure(figsize = (16, 10))

  for name, history in histories:
    val = plt.plot(history.epoch, history.history['val_' + key],
                   '--', label = name.title() + ' Val')
    plt.plot(history.epoch, history.history[key], color = val[0].get_color(),
             label=name.title() + ' Train')

  plt.xlabel('Epochs')
  plt.ylabel(key.replace('_', ' ').title())
  plt.legend()

  plt.xlim([0, max(history.epoch)])


plot_history([('baseline', baseline_history),
              ('small', small_history),
              ('large', large_history)])

### L2 model

In [None]:
hidden_layer_size = 100
l2_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28)),
    tf.keras.layers.Dense(hidden_layer_size, kernel_regularizer = tf.keras.regularizers.l2(0.001), activation = 'relu'),
    #tf.keras.layers.Dense(hidden_layer_size, kernel_regularizer = tf.keras.regularizers.l2(0.001), activation = 'relu'),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

l2_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy', 'sparse_categorical_crossentropy'])
l2_model.summary()

In [None]:
l2_history = l2_model.fit(x_train, y_train, epochs = epoch_size, validation_data = (x_test, y_test), verbose = 2)

In [None]:
l2_model.evaluate(x_test, y_test)

### Dropout model

In [None]:
hidden_layer_size = 100
drop_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28)),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dropout(0.1),
    #tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    #tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

drop_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy', 'sparse_categorical_crossentropy'])
drop_model.summary()

In [None]:
drop_history = drop_model.fit(x_train, y_train, epochs = epoch_size, validation_data = (x_test, y_test), verbose = 2)

In [None]:
drop_model.evaluate(x_test, y_test)

In [None]:
plot_history([('baseline', baseline_history),
              ('L2', l2_history),
              ('dropout', drop_history)])

### L2 and Dropout model

In [None]:
hidden_layer_size = 100
l2d_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28)),
    tf.keras.layers.Dense(hidden_layer_size, kernel_regularizer = tf.keras.regularizers.l2(0.001), activation = 'relu'),
    tf.keras.layers.Dropout(0.1),
    #tf.keras.layers.Dense(hidden_layer_size, kernel_regularizer = tf.keras.regularizers.l2(0.001), activation = 'relu'),
    #tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

l2d_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy', 'sparse_categorical_crossentropy'])
l2d_model.summary()

In [None]:
l2d_history = l2d_model.fit(x_train, y_train, epochs = epoch_size, validation_data = (x_test, y_test), verbose = 2)

In [None]:
l2d_model.evaluate(x_test, y_test)

In [None]:
plot_history([('baseline', baseline_history),
              ('L2 and dropout', l2d_history)])

### Deep model

In [None]:
hidden_layer_size = 100
deep_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28)),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

deep_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy', 'sparse_categorical_crossentropy'])
deep_model.summary()

In [None]:
epoch_size = 10
deep_history = deep_model.fit(x_train, y_train, epochs = epoch_size, validation_data = (x_test, y_test), verbose = 2)

In [None]:
deep_model.evaluate(x_test, y_test)

In [None]:
plot_history([('baseline', baseline_history),
              ('Deep', deep_history)])

### Final model

In [None]:
hidden_layer_size = 200
final_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28)),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

final_model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy', 'sparse_categorical_crossentropy'])
final_model.summary()

In [None]:
final_history = final_model.fit(x_train, y_train, epochs = 5, validation_data = (x_test, y_test), verbose = 2)

In [None]:
final_model.evaluate(x_test, y_test)

In [None]:
plot_history([('baseline', baseline_history),
              ('Final', final_history)])