# Bring in modules

In [29]:
import tensorflow as tf
print("tensorflow version", tf.__version__)

import sklearn
print("sklearn version", sklearn.__version__)

import numpy as np
print("numpy version", np.__version__)

tensorflow version 2.7.0
sklearn version 1.0.1
numpy version 1.20.1


# Configs

In [31]:
# With numpy, when a value is printed display more values per line
# https://stackoverflow.com/questions/21971449/how-do-i-increase-the-cell-width-of-the-jupyter-ipython-notebook-in-my-browser
np.set_printoptions(linewidth=5000)

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# In Pandas, display more rows and columns
# https://stackoverflow.com/a/11711637/4375369
# pd.set_option('display.max_rows', 100)
# pd.set_option('display.max_columns', 100)

RANDOM_SEED_FOR_REPRODUCIBILITY = 777

# Get raw data

In [17]:
(X_train_raw, y_train_raw), (X_test_raw, y_test_raw) = tf.keras.datasets.mnist.load_data()

# X_train_raw, y_train_raw, X_test_raw, y_test_raw

# Shuffle raw data

In [24]:
# https://scikit-learn.org/stable/modules/generated/sklearn.utils.shuffle.html

X_train_raw, y_train_raw = sklearn.utils.shuffle(X_train_raw, y_train_raw, random_state=RANDOM_SEED_FOR_REPRODUCIBILITY)

X_test_raw, y_test_raw = sklearn.utils.shuffle(X_test_raw, y_test_raw, random_state=RANDOM_SEED_FOR_REPRODUCIBILITY)

# Normalize example data

In [25]:
# https://www.tensorflow.org/api_docs/python/tf/math/reduce_max

maximum_value = tf.math.reduce_max(X_train_raw)

assert maximum_value == 255, "Maximum value is expected to be 255 but got {}".format(maximum_value)

X_train_normalized = X_train_raw / maximum_value

X_test_normalized = X_test_raw / maximum_value

# One-hot encode label data

In [26]:
# https://www.tensorflow.org/api_docs/python/tf/keras/utils/to_categorical

y_train_one_hot_encoded = tf.keras.utils.to_categorical(y_train_raw)

y_test_one_hot_encoded = tf.keras.utils.to_categorical(y_test_raw)

# Compare Raw to Modified

In [34]:
X_train_raw[0], X_train_normalized[0]

(array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  80, 167, 167, 249, 175, 167, 136,  62,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   8, 137, 222, 254, 254, 254, 254, 2

In [35]:
X_test_raw[0], X_test_normalized[0]

(array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   2,   7,  50, 238, 155,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 142, 254, 254, 254, 154,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  48, 231, 254, 254, 177, 152,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  50, 229, 254, 247, 187,  

In [36]:
y_train_raw[0], y_train_one_hot_encoded[0]

(8, array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0.], dtype=float32))

In [37]:
y_test_raw[0], y_test_one_hot_encoded[0]

(6, array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.], dtype=float32))

# Expand dimensions of example data to be used by convolutional network

In [43]:
# https://www.tensorflow.org/api_docs/python/tf/expand_dims

X_train_expand_dims = tf.expand_dims(X_train_normalized, axis=-1)

X_test_expand_dims = tf.expand_dims(X_test_normalized, axis=-1)

X_train_normalized.ndim, X_train_expand_dims.ndim, X_test_normalized.ndim, X_test_expand_dims.ndim

(3, 4, 3, 4)

# Accept the data for use

In [44]:
X_train = X_train_expand_dims
y_train = y_train_one_hot_encoded

X_test = X_test_expand_dims
y_test = y_test_one_hot_encoded

# Create the model architechture and train

In [54]:
tf.random.set_seed(RANDOM_SEED_FOR_REPRODUCIBILITY)

model_001 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(100, (3, 3), padding="same", activation=tf.keras.activations.relu),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    tf.keras.layers.Conv2D(100, (3, 3), padding="same", activation=tf.keras.activations.relu),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    tf.keras.layers.Conv2D(100, (3, 3), padding="same", activation=tf.keras.activations.relu),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    tf.keras.layers.Flatten(),
    
    tf.keras.layers.Dense(100, activation=tf.keras.activations.relu),
    tf.keras.layers.Dense(100, activation=tf.keras.activations.relu),
    tf.keras.layers.Dense(100, activation=tf.keras.activations.relu),
    
    tf.keras.layers.Dense(10, activation=tf.keras.activations.softmax)
])

model_001.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=[ "accuracy" ]
)

def learning_rate_schedule(epoch, current_learning_rate):
    return current_learning_rate

model_001.fit(
    X_train,
    y_train,
    epochs=20,
    validation_data=(X_test, y_test),
    callbacks=[
        tf.keras.callbacks.LearningRateScheduler(learning_rate_schedule),
        tf.keras.callbacks.EarlyStopping('val_accuracy', patience=5, restore_best_weights=True)
    ]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20


<keras.callbacks.History at 0x7fc64e58e400>

In [55]:
model_001.evaluate(X_test, y_test)



[0.022048039361834526, 0.9944999814033508]

# Summary

- I was able to introduce callbacks for early stopping. It also restored the weights from the best epoch run after stopping
- I introduced a callback for learning rate but did not adjust it this time
- I saught a library method for normalizing the example data, but did not find one that would do it simply without reshaping the data. I stuck to dividing by the max for now.
- I'm starting to get more of a rythm with this dataset
- I was able to successfully get the accuracy over 99%, with 99.76% on the train data and 99.45% on the test data. I didn't create a validation set in this round since the test data is