In [None]:
import tensorflow as tf 
import numpy as np
from tensorflow import keras
from tensorflow.keras.datasets import fashion_mnist, mnist
import datetime
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, Input, Dropout, BatchNormalization
import matplotlib.pyplot as plt
!rm -rf logs/fit

# Create networks with different shapes

Create a model with:
- one `Input` layer for datashapes (batch x 10), so 10 features per observation
- one `Dense` deep layer of 30 units
- one `Dense` outputlayer, with an output of (batch x 1), so 1 number.
- thus, a total of three layers

In [None]:
# your code here

model = Model(inputs=[input], outputs=[output])
model.summary()

In [None]:
assert (model._input_layers[0].input_shape[0][1] == 10) 
assert (len(model._input_layers) == 1) 
assert (len(model.layers) == 3) 
assert (model.layers[1].output_shape[1] == 30) 
assert (len(model._output_layers) == 1)
assert (model._output_layers[0].output_shape[1] == 1)

Create a model that:
- starts with two `Input` layers (`inputa` and `inputb`), both for data with shape (batch x 10)
- input `a` feeds into a `Dense` layer with 50 units
- the output of that Dense layer is concatenated with input `b`
- So, the concatenated layer should output a layer of shape (batch x 60), because we concatenate 50 + 10
- the concatenated layers are fed into a `Dense` layer with a single unit, which is the output of the model.
- we have a total of 5 layers (2 input, 1 Dense, 1 concat, 1 Dense)

In [None]:
# your code here

In [None]:
assert len(model._input_layers) == 2 
assert (model._input_layers[0].input_shape[0][1] == 10 & model._input_layers[1].input_shape[0][1] == 10) 
assert model.layers[1].output_shape[1] == 50
assert model.layers[3].output_shape[0][1] == 60
assert model.layers[4].output_shape[1] == 1

Now, add a double output to the above model. The output of the first `Dense` layer with 50 units should also be an output, together with the output of the second `Dense` layer with one unit.

In [None]:
# your code here

In [None]:
assert len(model._input_layers) == 2 
assert (model._input_layers[0].input_shape[0][1] == 10 & model._input_layers[1].input_shape[0][1] == 10) 
assert model.layers[1].output_shape[1] == 50
assert model.layers[3].output_shape[0][1] == 60
assert model.layers[4].output_shape[1] == 1
assert len(model._output_layers) == 2

# Create a model for MNIST
## prepare data

We load the mnist dataset and make a train-test split. Make sure the random_state and size does not change, because that will influence the `assert` tests later on.

Note that in a real life setting you should ONLY use `random_state` if you want to guarantee that the split comes out exactly the same every time you make it.

In [None]:
#(X_train, y_train), (X_valid, y_valid) = fashion_mnist.load_data()
(X_train, y_train), (X_valid, y_valid) = mnist.load_data()
from sklearn.model_selection import train_test_split
X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.4, random_state=42)

In [None]:
X_train.shape, X_valid.shape, X_test.shape

In [None]:
idx = 2 #let's have a look at case 25. You can change this to have a look at others
digit = X_train[idx]
plt.imshow(digit, cmap='binary')
y_train[idx]

In [None]:
# the y are categories, ranging from 0 to 9.
y_train

In [None]:
# we scale the data, simply between [0,1]
X_train = X_train / 255.
X_valid = X_valid / 255.
X_test = X_test / 255.

## Create baseline model

In [None]:
# first, we reshape
X_trainr = X_train.reshape(X_train.shape[0], -1)
X_testr = X_test.reshape(X_test.shape[0], -1)
X_trainr.shape

After reshaping, we have size (batch x features)

In [None]:
# usefull for plotting heatmaps of a confusion matrix
import seaborn as sns
def cfm_heatmap(cfm, figsize = (8,8), scale = None, vmin=None, vmax=None):
    """
    figsize: tuple, default (8,8)
    scale: string. The direction over which the numbers are scaled. Either None, 'total', 'rowwise' or 'colwise'
    """
    if (scale == 'total'):
        cfm_norm = cfm / np.sum(cfm)
    elif (scale == 'rowwise'):
        cfm_norm = cfm / np.sum(cfm, axis=1, keepdims=True)
    elif (scale == 'colwise'):
        cfm_norm = cfm / np.sum(cfm, axis=0, keepdims=True)
    else:
        cfm_norm = cfm
    plt.figure(figsize=figsize)
    plot = sns.heatmap(cfm_norm, annot = cfm_norm, vmin=vmin, vmax=vmax)
    plot.set(xlabel = 'Predicted', ylabel = 'Target')


Create a basic classifier. The simplest is a `SGDClassifier`. Make one, fit, predict and make a confusion matrix. Tip: speed it up with `n_jobs`.

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix

# your code here

And test the accuracy. This is the baseline you want to improve on.

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, yhat)

In [None]:
assert accuracy_score(y_test, yhat) > 0.9

# Create Deep Neural Network

Now create a Sequential model. Try some variations with amounts of layers and units. 
Experiment with the following things:

- different amounts of layers
- different amounts of units in every layer

In [None]:
early_stop = EarlyStopping(patience=10, restore_best_weights=True)

model = Sequential([

    # your code here
])
model.compile(loss="sparse_categorical_crossentropy", optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid), callbacks=[early_stop], verbose = 0)
model.evaluate(X_test, y_test)

In [None]:
# you should be able to get above 96% with a bit of trying
assert model.evaluate(X_test, y_test)[1] > 0.96

Build a hypermodel.

define ranges for
- amounts of units (at least between 128 and 320)
- amounts of layers (range at least between 1 and 6) with a forloop

In [None]:
def build_model(hp):
    input = Input(shape = [28,28])
    
    # your code here
    
    output = Dense(10, activation='softmax')(x)
    model = Model(inputs = [input], outputs = [output])
    model.compile(loss="sparse_categorical_crossentropy", optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
    return model

In [None]:
import kerastuner as kt
# cleaning up folders from old runs
!rm -rf ktuner/

tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=3,
    directory='ktuner',
    project_name='mnist'
)
tuner.search(X_train, y_train, epochs = 10, validation_data = (X_valid, y_valid), verbose=1)

After that, we obtain the best model, and fit it.

In [None]:
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]
print(best_hps.values)
model = tuner.get_best_models()[0]
model.summary()

history = model.fit(X_train, y_train, epochs=100, validation_data=(X_valid, y_valid), callbacks=[early_stop], verbose = 0)

You should be able to get up to 97.5, even above 98.

In [None]:
assert model.evaluate(X_test, y_test)[1] > 0.975