# Lab 5: Neural networks

In this lab we will build dense neural networks on the MNIST dataset.

Make sure you read the tutorial for this lab first.

## Load the data and create train-test splits

In [None]:
!pip install openml
%matplotlib inline
import numpy as np
import pandas as pd
import openml as oml
import os
import matplotlib.pyplot as plt
import tensorflow.keras as keras



In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"

In [None]:
# Download MNIST data. Takes a while the first time.
mnist = oml.datasets.get_dataset(554)
X, y, _, _ = mnist.get_data(target=mnist.default_target_attribute, dataset_format='array');
X = X.reshape(70000, 28, 28)

# Take some random examples
from random import randint
fig, axes = plt.subplots(1, 5,  figsize=(10, 5))
for i in range(5):
    n = randint(0,70000)
    axes[i].imshow(X[n], cmap=plt.cm.gray_r)
    axes[i].set_xticks([])
    axes[i].set_yticks([])
    axes[i].set_xlabel("{}".format(y[n]))
plt.show();

In [None]:
# For MNIST, there exists a predefined stratified train-test split of 60000-10000. We therefore don't shuffle or stratify here.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,
                                                        random_state=42)

## Exercise 1: Preprocessing
* Normalize the data: map each feature value from its current representation (an integer between 0 and 255) to a floating-point value between 0 and 1.0.
* Store the floating-point values in `x_train_normalized` and `x_test_normalized`.
* Map the class label to a on-hot-encoded value. Store in `y_train_encoded` and `y_test_encoded`.

In [None]:
# prompt: Normalize the data: map each feature value from its current representation (an integer between 0 and 255) to a floating-point value between 0 and 1.0.

x_train_normalized = X_train / 255.0
x_test_normalized = X_test / 255.0


In [None]:


from tensorflow.keras.utils import to_categorical
y_train_encoded = to_categorical(y_train)
y_test_encoded = to_categorical(y_test)

## Exercise 2: Create a deep neural net model

Implement a `create_model` function which defines the topography of the deep neural net, specifying the following:

* The number of layers in the deep neural net: Use 2 dense layers for now.
* The number of nodes in each layer: these are parameters of your function.
* Any regularization layers. Add at least one dropout layer.
* The optimizer and learning rate. Make the learning rate a parameter of your function as well.

Consider:
* What should be the shape of the input layer?
* Which activation function you will need for the last layer, since this is a 10-class classification problem?

In [None]:
# prompt: ### Create and compile a 'deep' neural net
# def create_model(layer_1_units=32, layer_2_units=10, learning_rate=0.001, dropout_rate=0.3): with hidden layer

def create_model(layer_1_units=32, layer_2_units=10, learning_rate=0.001, dropout_rate=0.3):
  model = keras.Sequential()
  model.add(keras.layers.Flatten(input_shape=[28, 28]))
  model.add(keras.layers.Dense(layer_1_units, activation='relu'))
  model.add(keras.layers.Dropout(dropout_rate))
  model.add(keras.layers.Dense(layer_2_units, activation='softmax'))
  optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
  model.compile(optimizer=optimizer,
                loss='categorical_crossentropy',
                metrics=['accuracy'])
  return model


## Exercise 3: Create a training function
Implement a `train_model` function which trains and evaluates a given model.
It should do a train-validation split and report the train and validation loss and accuracy, and return the training history.

In [None]:
# prompt: Implement a train_model function which trains and evaluates a given model. It should do a train-validation split and report the train and validation loss and accuracy, and return the training history.

def train_model(model,x,y, epochs=10, batch_size=100, validation_split=0.2):
  history = model.fit(x, y, epochs=epochs,
                    batch_size=batch_size, validation_split=validation_split)
  return history

# ## Exercise 4: Train the model
# Use your `create_model` and `train_model` functions to train the model.
#
# * Create a model with the following parameters:
#     * 2 layers
#     * 32 units in the first layer
#     * 10 units in the second layer
#     * 0.3 dropout rate
#     * 0.001 learning rate
# * Train the model for 10 epochs with a batch size of 100 and a validation split of 0.2.
# * Store the returned training history in `history`.
model = create_model()
history = train_model(model,x_train_normalized,y_train_encoded)

# ## Exercise 5: Evaluate the model
# Evaluate the model on the test set using the `model.evaluate` method.
model.evaluate(x_test_normalized, y_test_encoded)


## Exercise 4: Evaluate the model

Train the model with a learning rate of 0.003, 50 epochs, batch size 4000, and a validation set that is 20% of the total training data.
Use default settings otherwise. Plot the learning curve of the loss, validation loss, accuracy, and validation accuracy. Finally, report the performance on the test set.

Feel free to use the plotting function below, or implement the callback from the tutorial to see results in real time.

In [None]:
# Helper plotting function
#
# history: the history object returned by the fit function
# list_of_metrics: the metrics to plot
def plot_curve(history, list_of_metrics):

    plt.figure()
    plt.xlabel("Epoch")
    plt.ylabel("Value")

    epochs = history.epoch
    hist = pd.DataFrame(history.history)

    for m in list_of_metrics:
      if m in hist.columns:
        x = hist[m]
        plt.plot(epochs[1:], x[1:], label=m, lw=2)

    plt.legend()

In [None]:
# Solution
# Settings


# Create the model the model's topography.
model = create_model(learning_rate=0.003)

train=train_model(model,x_train_normalized,y_train_encoded,epochs=50,batch_size=4000,validation_split=0.2)



# Evaluate against the test set.
model.evaluate(x_test_normalized, y_test_encoded)

In [None]:
list_of_metrics = ['accuracy','recall','precision', 'f1']
plot_curve(train,list_of_metrics=list_of_metrics)

## Exercise 5: Optimize the model

Try to optimize the model, either manually or with a tuning method. At least optimize the following:
* the number of hidden layers
* the number of nodes in each layer
* the amount of dropout layers and the dropout rate

Try to reach at least 96% accuracy against the test set.

In [None]:
# Solution
# For an example with random search, see the tutorial
# Here, we search manually, following the following hunches:
#   * Adding more nodes to the first hidden layer will improve accuracy. The input size is 784, so we should not make it too small
#   * Adding a second hidden layer generally improves accuracy.
#   * For larger models (more nodes), we need to regularize more (more dropout)



# Create the model the model's topography.
model=create_model(layer_1_units=256,dropout_rate=0.7)
# Train the model on the normalized training set.
train=train_model(model,x_train_normalized,y_train_encoded,epochs=50,batch_size=4000,validation_split=0.2)

# Plot a graph of the metric vs. epochs.
plot_curve(train,list_of_metrics=list_of_metrics)

# Evaluate against the test set.
model.evaluate(x_test_normalized, y_test_encoded)

In [None]:
!pip install keras-tuner
import keras_tuner as kt

def model_builder(hp):
  model = keras.Sequential()
  model.add(keras.layers.Flatten(input_shape=[28, 28]))
  for _ in range(hp.Int('num_layers', 1, 3)):
    model.add(keras.layers.Dense(units=hp.Int('units',min_value=32,max_value=265,step=32), activation='relu'))
    model.add(keras.layers.Dropout(rate=hp.Float('dropout',0,0.9,step=0.1)))
  model.add(keras.layers.Dense(10, activation='softmax'))
  learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
  optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
  model.compile(optimizer=optimizer,
                loss='categorical_crossentropy',
                metrics=['accuracy'])
  return model

tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     directory='my_dir')
tuner.search(x_train_normalized, y_train_encoded, epochs=10,
              validation_split=0.2, verbose=2)

best_model = tuner.get_best_models()[0]
best_model.evaluate(x_test_normalized, y_test_encoded)
