In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
import time

import torch

# PyTorch
# Connect to GPU is possible.
if torch.cuda.is_available():
    device_name = torch.device("cuda")
else:
    device_name = torch.device('cpu')

print("Using {}.".format(device_name))

# TensorFlow
try:
    DEVICE_NAME = tf.test.gpu_device_name()
    print("Found GPU at: {}".format(DEVICE_NAME))
except:
    DEVICE_NAME = "/device:CPU:0"
    print("ERROR: Not connected to a GPU runtime.")

Using cuda.
Found GPU at: /device:GPU:0


In [None]:
# Download the data using TensorFlow

def download_mnist_data(channels = None, categorize = False):
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

    # Normalize the pixel values from [0, 255] to [0, 1].
    X_train = X_train.astype(float) / 255
    X_test = X_test.astype(float) / 255

    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size = 10000, random_state = 42)
    print("Loaded {} train, {} validation and {} test images.".format(len(X_train), len(X_valid), len(X_test)))

    if channels == "NHWC":
        X_train = X_train.reshape([*X_train.shape, 1])
        X_valid = X_valid.reshape([*X_valid.shape, 1])
        X_test = X_test.reshape([*X_test.shape, 1])
    elif channels == "NCHW":
        X_train = X_train.reshape([X_train.shape[0], 1, X_train.shape[1], X_train.shape[2]])
        X_valid = X_valid.reshape([X_valid.shape[0], 1, X_valid.shape[1], X_valid.shape[2]])
        X_test = X_test.reshape([X_test.shape[0], 1, X_test.shape[1], X_test.shape[2]])

    if categorize == True:
        y_train = tf.keras.utils.to_categorical(y_train)
        y_valid = tf.keras.utils.to_categorical(y_valid)
        y_test = tf.keras.utils.to_categorical(y_test)

    return X_train, y_train, X_valid, y_valid, X_test, y_test

# PyTorch

In [None]:
def make_torch_dataloader(X, y, batch_size = 20, shuffle = False):
    X = torch.from_numpy(X).to(torch.float32)
    y = torch.from_numpy(y).to(torch.int64)
    ds = torch.utils.data.TensorDataset(X, y)
    return torch.utils.data.DataLoader(ds, batch_size = batch_size, shuffle = shuffle)

def make_torch_dataloaders(X_train, y_train, X_valid, y_valid, X_test, y_test, batch_size = 20):
    train_ds = make_torch_dataloader(X_train, y_train, batch_size = batch_size, shuffle = True)
    valid_ds = make_torch_dataloader(X_valid, y_valid, batch_size = batch_size)
    test_ds = make_torch_dataloader(X_test, y_test, batch_size = batch_size)

    return train_ds, valid_ds, test_ds

batch_size = 20
X_train, y_train, X_valid, y_valid, X_test, y_test = download_mnist_data(channels = "NCHW", categorize = False)
train_dl, valid_dl, test_dl = make_torch_dataloaders(X_train, y_train, X_valid, y_valid, X_test, y_test, batch_size = batch_size)

Loaded 50000 train, 10000 validation and 10000 test images.


In [None]:
class PyTorchMnistCNNClassifier(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # Tensor shape: [N, 1, 28, 28]
        self.conv1 = torch.nn.Conv2d(in_channels = 1, out_channels = 16, kernel_size = 3, padding = 1)
        self.relu1 = torch.nn.ReLU()
        self.pool1 = torch.nn.MaxPool2d(kernel_size = 2)
        # Tensor shape: [N, 16, 14, 14]
        self.conv2 = torch.nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 3, padding = 1)
        self.relu2 = torch.nn.ReLU()
        self.pool2 = torch.nn.MaxPool2d(kernel_size = 2)
        # Tensor shape: [N, 32, 7, 7]
        self.flatten = torch.nn.Flatten()
        # Tensor shape: [N, 1568]
        self.linear1 = torch.nn.Linear(in_features = 1568, out_features = 1024)
        self.relu3 = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(p = 0.5)
        # Tensor shape: [N, 1024]
        self.linear2 = torch.nn.Linear(in_features = 1024, out_features = 10)
        # Tensor shape: [N, 10]

    def forward(self, x, training = False):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        x = self.flatten(x)
        x = self.linear1(x)
        x = self.relu3(x)
        x = self.dropout(x)
        x = self.linear2(x)

        return x

In [None]:
def xy_to_device(x, y, device = "cpu"):
    return x.to(device), y.to(device)

def forward_pass(x, y, model, loss_fn):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    return y_pred, loss

def back_propagation(loss, optimizer):
    # 0. Reset the gradients.
    optimizer.zero_grad()
    # 1. Calculate gradients from loss function.
    loss.backward()
    # 2. Update model weights with gradients.
    optimizer.step()

def train_loop(model, loss_fn, optimizer, train_dl, metric_fn = None, device = "cpu"):
    loss_hist = 0
    if metric_fn is not None:
        metric_hist = 0

    # Set model to training mode.
    model.train()
    # Move model to device (CPU or GPU).
    model.to(device) 
    for x, y in train_dl:
        # 1. Move data to device (CPU or GPU).
        x, y = xy_to_device(x, y, device)
        # 2. Model forward pass.
        pred, loss = forward_pass(x, y, model, loss_fn)
        # 3. Model back propagation.
        back_propagation(loss, optimizer)
        # 4. Update the metrics. Divide by the batch size to get the mean.
        loss_hist = loss_hist + loss.item()
        if metric_fn is not None:
            metric_hist = metric_hist + metric_fn(pred, y)

    # Divide by the number of batches to get the epoch mean.
    loss_hist = loss_hist / len(train_dl)

    if metric_fn is not None:
        # Divide by the number of batches to get the epoch mean.
        metric_hist = metric_hist / len(train_dl)
        return loss_hist, metric_hist
    else:
        return loss_hist

def evaluation_loop(model, loss_fn, valid_dl, metric_fn = None, device = "cpu"):
    loss_hist = 0
    if metric_fn is not None:
        metric_hist = 0

    # Set model to evaluation mode.
    model.eval()
    model.to(device)
    # Switch gradients off to conserve resources.
    with torch.no_grad():
        for x, y in valid_dl:
            # 1. Move data to device (CPU or GPU).
            x, y = xy_to_device(x, y, device)
            # 2. Model forward pass.
            pred, loss = forward_pass(x, y, model, loss_fn)
            # 3. Update the metrics.
            loss_hist = loss_hist + loss.item()
            if metric_fn is not None:
                metric_hist = metric_hist + metric_fn(pred, y) 

    loss_hist = loss_hist / len(valid_dl)

    if metric_fn is not None:
        metric_hist = metric_hist / len(valid_dl)
        return loss_hist, metric_hist
    else:
        return loss_hist

def train(model, loss_fn, optimizer, num_epochs, train_dl, valid_dl, metric_fn, verbose = False, device = "cpu"):
    train_loss = [] 
    train_acc = []
    valid_loss = [] 
    valid_acc = []

    # Repeat the training for the specified number of epochs.
    for i in range(num_epochs):
        # Training loop.
        loss_hist, acc_hist = train_loop(model, loss_fn, optimizer, train_dl, metric_fn, device = device)
        train_loss.append(loss_hist)
        train_acc.append(acc_hist)

        # Evaluation loop.
        loss_hist, acc_hist = evaluation_loop(model, loss_fn, valid_dl, metric_fn, device = device)
        valid_loss.append(loss_hist)
        valid_acc.append(acc_hist)

        if verbose == True:
            epoch_str = "Epoch {:3d}: ".format(i+1)
            loss_str = "loss: {:.3f} ".format(train_loss[i])
            acc_str = "accuracy: {:.3f}, ".format(train_acc[i])
            val_loss_str = "val_loss: {:.3f} ".format(valid_loss[i])
            val_acc_str = "val_accuracy: {:.3f}.".format(valid_acc[i])
            print(epoch_str + loss_str + acc_str + val_loss_str + val_acc_str)
            """
            print("Epoch {:3d}: loss: {:.3f} accuracy: {:.3f}, val_loss: {:.3f} val_accuracy: {:.3f}.".format(1+i, 
                                                                                                    train_loss[i],
                                                                                                     train_acc[i],
                                                                                                    valid_loss[i],
                                                                                                     valid_acc[i]))"""
    return train_loss, train_acc, valid_loss, valid_acc

In [None]:
# Create the model, loss function, optimizer and metric objects.
model = PyTorchMnistCNNClassifier()
loss_fn = torch.nn.CrossEntropyLoss(reduction = "mean")
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

def accuracy(y_pred, y_true):
    correct = (torch.argmax(y_pred, dim = 1) == y_true).float()
    return correct.mean()

# Train the model.
start_time = time.time()
torch.manual_seed(42)
num_epochs = 10
hist = train(model, loss_fn, optimizer, num_epochs, train_dl, valid_dl, accuracy, True, device_name)
print("Time elapsed: {:.2f}s.".format(time.time() - start_time))

Epoch   1: loss: 0.181 accuracy: 0.945, val_loss: 0.059 val_accuracy: 0.983.
Epoch   2: loss: 0.063 accuracy: 0.981, val_loss: 0.044 val_accuracy: 0.986.
Epoch   3: loss: 0.046 accuracy: 0.986, val_loss: 0.047 val_accuracy: 0.986.
Epoch   4: loss: 0.034 accuracy: 0.989, val_loss: 0.038 val_accuracy: 0.990.
Epoch   5: loss: 0.028 accuracy: 0.991, val_loss: 0.037 val_accuracy: 0.989.
Epoch   6: loss: 0.023 accuracy: 0.993, val_loss: 0.038 val_accuracy: 0.990.
Epoch   7: loss: 0.019 accuracy: 0.994, val_loss: 0.035 val_accuracy: 0.991.
Epoch   8: loss: 0.016 accuracy: 0.995, val_loss: 0.046 val_accuracy: 0.991.
Epoch   9: loss: 0.016 accuracy: 0.995, val_loss: 0.040 val_accuracy: 0.991.
Epoch  10: loss: 0.012 accuracy: 0.997, val_loss: 0.045 val_accuracy: 0.990.
Time elapsed: 89.87s.


In [None]:
test_loss, test_acc = evaluation_loop(model, loss_fn, test_dl, accuracy, device_name)

print("Test set loss: {:.3f}, test set accuracy: {:.3f}.".format(test_loss, test_acc))

Test set loss: 0.031, test set accuracy: 0.992.


In [None]:
# Predictions

def predict(model, dl, device = "cpu"):
    model.to(device)
    pred = torch.Tensor([])
    for b in dl:
        x_bat = b[0]
        y_bat = b[1]
        x_bat = x_bat.to(device)
        y_bat = y_bat.to(device)
        pred = torch.cat([pred, model(x_bat).argmax(axis = 1).to(torch.int64)])

    return pred

pred = predict(model, test_dl)

In [None]:
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       980
           1       1.00      1.00      1.00      1135
           2       0.99      0.99      0.99      1032
           3       0.99      1.00      0.99      1010
           4       1.00      0.99      0.99       982
           5       0.99      0.99      0.99       892
           6       1.00      0.99      0.99       958
           7       0.99      0.99      0.99      1028
           8       0.99      0.99      0.99       974
           9       0.99      0.99      0.99      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000



# TensorFlow

In [None]:
def make_tf_dataset(X, y, batch_size = 20, shuffle = False):
    X = tf.data.Dataset.from_tensor_slices(X)
    y = tf.data.Dataset.from_tensor_slices(y)
    ds = tf.data.Dataset.zip((X, y))
    if shuffle == True:
        ds = ds.shuffle(len(X))
    return ds.batch(batch_size)

def make_tf_datasets(X_train, y_train, X_valid, y_valid, X_test, y_test, batch_size = 20):
    train_ds = make_tf_dataset(X_train, y_train, batch_size = batch_size, shuffle = True)
    valid_ds = make_tf_dataset(X_valid, y_valid, batch_size = batch_size)
    test_ds = make_tf_dataset(X_test, y_test, batch_size = batch_size)
    return train_ds, valid_ds, test_ds

batch_size = 20
X_train, y_train, X_valid, y_valid, X_test, y_test = download_mnist_data(channels = "NHWC", categorize = True)
train_ds, valid_ds, test_ds = make_tf_datasets(X_train, y_train, X_valid, y_valid, X_test, y_test, batch_size = batch_size)

Loaded 50000 train, 10000 validation and 10000 test images.


In [None]:
class TFMnistCNNClassifier(tf.keras.Model):
    def __init__(self):
        super().__init__()
        # Tensor shape: [N, 28, 28, 1]
        self.conv1 = tf.keras.layers.Conv2D(filters = 16, kernel_size = 3, padding = "same", activation = "relu", input_shape = (28, 28, 1))
        self.pool1 = tf.keras.layers.MaxPool2D([2, 2])
        # Tensor shape: [N, 14, 14, 16]
        self.conv2 = tf.keras.layers.Conv2D(filters = 32, kernel_size = 3, padding = "same", activation = "relu")
        self.pool2 = tf.keras.layers.MaxPool2D([2, 2])
        # Tensor shape: [N, 7, 7, 32]
        self.flatten = tf.keras.layers.Flatten()
        # Tensor shape: [N, 1568]
        self.linear1 = tf.keras.layers.Dense(1024, activation = "relu")
        # Tensor shape: [N, 1024]
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.linear2 = tf.keras.layers.Dense(10, activation = "softmax")
        # Tensor shape: [N, 10]

    def call(self, x, training = False):
        x = self.conv1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.pool2(x)

        x = self.flatten(x)
        x = self.linear1(x)
        x = self.dropout(x)
        x = self.linear2(x)
        return x

def make_tf_model(input_shape = [28, 28, 1]):
    # Create model architecture.
    model = TFMnistCNNClassifier()
    # Set model inputs.
    inputs = tf.keras.layers.Input(input_shape)
    # Set model outputs.
    outputs = model(inputs)
    # Create model for training.
    return tf.keras.Model(inputs = inputs, outputs = outputs)

def make_tf_model_on_device(device = "/device:CPU:0"):
    with tf.device(device):
        tf_model = make_tf_model()
        tf_model.compile(loss = tf.keras.losses.CategoricalCrossentropy(), 
                        optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
                        metrics = tf.keras.metrics.CategoricalAccuracy())
    return tf_model

In [None]:
tf_model = make_tf_model_on_device(device = DEVICE_NAME)
  
history = tf_model.fit(train_ds, batch_size = batch_size, epochs = num_epochs, validation_data = valid_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
tf_model.evaluate(test_ds)



[0.03002397157251835, 0.9922999739646912]

In [None]:
# Make class predictions on test_ds using argmax.
y_pred = tf_model.predict(test_ds).argmax(axis = 1)

# Extract the true y-labels from test_ds. 
# We could have simply used y_test.argmax(axis = 1) as well!
y_true = np.array([np.argmax(bat[1]) for bat in test_ds.unbatch()])

# Use the predictions to make a classification report.
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       980
           1       1.00      0.99      1.00      1135
           2       1.00      0.99      0.99      1032
           3       0.99      1.00      0.99      1010
           4       1.00      0.99      0.99       982
           5       0.99      0.99      0.99       892
           6       0.99      0.99      0.99       958
           7       0.99      0.99      0.99      1028
           8       0.98      0.99      0.99       974
           9       0.99      0.99      0.99      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000

