# Convolution Neural Networks - MNIST Dataset - Parallel

> Joseph P. Vantassel, Texas Advanced Computing Center - The University of Texas at Austin


In [None]:
# If you have not done so previously.
!pip install -r requirements.txt

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models

mpl.rcParams["font.size"]=8

## Import and Visualize Data

In [None]:
data = keras.datasets.mnist.load_data()
(x_train, y_train), (x_test, y_test) = data

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

In [None]:
for idx in range(3):
    plt.imshow(x_train[idx], cmap="binary_r")
    plt.title(f"y={y_train[idx]}")
    plt.show()

## Preprocess

### Image Preperation

- Calculate mean and std and store for later.
- Normalize image inputs.

In [None]:
x_mean, x_std = np.mean(x_train), np.std(x_train)
print(f"mean={x_mean:.2f}, std={x_std:.2f}")
x_train_norm = (x_train - x_mean)/x_std
print(f"mean={np.mean(x_train_norm):.2f}, std={np.std(x_train_norm):.2f}")

### Output Preperation

- One-hot encoding

In [None]:
y_train_one_hot = np.zeros((y_train.size, 10), dtype=int)
for row, col in enumerate(y_train):
    y_train_one_hot[row, col] = 1

### Check

In [None]:
idx = 0
plt.imshow(x_train_norm[idx], cmap="binary_r")
plt.axis("off")
plt.title(f"y={y_train[idx]}")
plt.show()
print(y_train[idx], y_train_one_hot[idx])

## Develop Network Architecture

For parallel training, add `mirrored_strategy` context.

In [None]:
mirrored_strategy = tf.distribute.MirroredStrategy([f"GPU:{x}" for x in range(4)])

def get_model():
    with mirrored_strategy.scope():
        inputs = keras.Input(shape=(28, 28, 1))
        x = layers.Conv2D(32, kernel_size=3, strides=2, activation="relu")(inputs)
        x = layers.Conv2D(64, kernel_size=3, activation="relu")(x)
        x = layers.Conv2D(64, kernel_size=3, activation="relu")(x)
        x = layers.Conv2D(10, kernel_size=3, activation="relu")(x)
        b, h, w, c = x.shape
        x = layers.AveragePooling2D(pool_size=(h, w))(x)
        x = layers.Flatten()(x)
        model = keras.Model(inputs, x)
    return model
model = get_model()

### Define Loss Function

- Categorical Cross Entropy on Logits

In [None]:
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0, reduction="auto")

In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)

In [None]:
model.compile(optimizer=optimizer, loss=loss)

In [None]:
model.summary()

In [None]:
# Hyperparameters
batch_size = 64
epochs = 10
validation_split = 0.2

history = model.fit(x_train_norm, y_train_one_hot, validation_split=validation_split, epochs=epochs)

In [None]:
plt.plot(history.epoch, history.history["loss"], color="black", label="Training Data")
plt.plot(history.epoch, history.history["val_loss"], color="blue", label="Validation Data")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

### View Results

In [None]:
y_test_predicted = model.predict((x_test - x_mean)/x_std)
y_test_prob = keras.layers.Softmax(axis=-1)(y_test_predicted)

In [None]:
for idx in range(5):
    fig, axs = plt.subplots(ncols=2, figsize=(4, 1), dpi=150)
    ax = axs[0]
    ax.imshow(x_test[idx], cmap="binary_r")
    ax.axis("off")

    ax = axs[1]
    ax.bar(np.arange(10), y_test_prob[idx])
    ax.set_xticks(range(10))
    ax.plot(y_test[idx], 0.5, marker="d", color="lightgreen")
    ax.set_xticks(range(10))
    ax.set_ylim(0, 1)
    ax.set_xlabel("Classification")
    ax.set_ylabel("Probability")
    plt.show()

## Calculate Prediction Accuracy

In [None]:
y_pred = np.argmax(y_test_prob, axis=-1)
accuracy = np.sum(y_test == y_pred)/len(y_pred)
print(f"Test Accuracy: {accuracy*100:.1f}%")

## Failed Predictions

In [None]:
idxs = np.argwhere(y_test != y_pred).flatten()

nidxs = 5
count = 0
for idx in idxs:
    fig, axs = plt.subplots(ncols=2, figsize=(4, 1), dpi=150)
    ax = axs[0]
    ax.imshow(x_test[idx], cmap="binary_r")
    ax.axis("off")

    ax = axs[1]
    ax.bar(np.arange(10), y_test_prob[idx])
    ax.plot(y_test[idx], 0.5, marker="d", color="lightgreen")
    ax.set_xticks(range(10))
    ax.set_ylim(0, 1)
    ax.set_xlabel("Classification")
    ax.set_ylabel("Probability")
    plt.show()
    count += 1
    
    if count == nidxs:
        break