In [None]:
import os
import random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from dataclasses import dataclass
block_plot = False

SEED_VALUE = 42
random.seed(SEED_VALUE)
np.random.seed(SEED_VALUE)
tf.random.set_seed(SEED_VALUE)


print(tf.__version__)


In [None]:
def sys_config():
    gpu_devices = tf.config.list_physical_devices('GPU')
    print(gpu_devices)

    if(len(gpu_devices) > 0):
        print('Using GPU')
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

        tf.config.experimental.set_visible_devices(gpu_devices[0], 'GPU')
        tf.config.experimental.set_memory_growth(gpu_devices[0], True)

        os.environ['TF_USE_CUDNN'] = "true"
    else:
        print('Using CPU')

In [None]:
def  get_data():
    mnist_data = tf.keras.datasets.mnist

    (X_train, y_train), (X_test, y_test) = mnist_data.load_data()
    return((X_train, y_train), (X_test, y_test))

In [None]:
def proprocess_data(train_set, test_set, resize_to=None, num_classes=10, seed=42):
    (X_train, y_train) = train_set
    (X_test, y_test)   = test_set

    # Split complete training data into vaidation data and training data-set
    X_train, X_validate, y_train, y_validate = train_test_split(X_train, y_train, shuffle=True, stratify=y_train, test_size=0.1, random_state=seed)

    # Add an axis to gray scale image, not sure why ?
    if len(X_train.shape) != 4:
        X_train = tf.expand_dims(X_train, axis=3)
        X_validate = tf.expand_dims(X_train, axis=3)
        X_test = tf.expand_dims(X_train, axis=3)

    # Re-size if requested via the command line parameters
    if resize_to:
        if isinstance(resize_to, int):
            resize_to = (resize_to, resize_to)
        X_train = tf.image.resize(X_train, resize_to)
        X_validate = tf.image.resize(X_validate, resize_to)
        X_test = tf.image.resize(X_test, resize_to)

    n_train = X_train.shape[0]
    n_validate = X_validate.shape[0]
    n_test = X_test.shape[0]
    image_shape = X_train.shape[0]


    print('\n')
    print('There are {} training examples'.format(n_train))
    print('There are {} validation examples'.format(n_validate))
    print('There are {} test examples'.format(n_test))

    assert num_classes == len(np.unique(y_train)), "Mis-match in number of classes."
    print('There are {} classes'.format(num_classes))

    NUM_CLASSES = num_classes

    # One hot encoding of the output classes
    if len(y_train.shape) != 2:
        y_train    = tf.one_hot(y_train, NUM_CLASSES)
        y_validate = tf.one_hot(y_validate, NUM_CLASSES)
        y_test     = tf.one_hot(y_test, NUM_CLASSES)

    print('\nData split:\n')
    print(f'X_train: {X_train.shape}, y_train: {y_train.shape}')
    print(f'X_validate: {X_validate.shape}, y_validate: {y_validate.shape}')
    print(f'X_test: {X_test.shape}, y_test: {y_test.shape}')

    # Normalize the data
    X_train    = tf.cast(X_train, tf.float32) / 255.0
    X_validate = tf.cast(X_validate,tf.float32) / 255.0
    X_test     = tf.cast(X_test,tf.float32) / 255.0

    print('Ground truth has been one hot encoded')
    print(np.transpose(y_train[:9]))

    return(X_train, y_train), (X_validate,y_validate), (X_test,y_test)

In [None]:
def visualize_samples(X,y):
    plt.figure(figsize=(18,8))
    for i in range(8):
        plt.subplot(2, 4, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(X[i], cmap='gray')
        plt.xlabel(y[i])
        plt.axis("off")
        
    plt.suptitle('Dataset Samples', fontsize=18)
    plt.subplots_adjust(wspace=0.2, hspace=0.2)
    plt.show(block=block_plot)
    plt.close()

In [None]:
# MAIN 
sys_config()
(X_train, y_train), (X_test, y_test) = get_data()
proprocess_data((X_train, y_train), (X_test, y_test), resize_to=False)
visualize_samples(X_test,y_test)