In [1]:
# set seed for reproducibility purposes.
from numpy.random import seed
seed(888)
from tensorflow.random import set_seed
set_seed(404)

In [2]:
import tensorflow as tf
import numpy as np
import os

In [3]:
# CONSTANTS
X_TRAIN_PATH = 'MNIST_Dataset/digit_xtrain.csv'
X_TEST_PATH = 'MNIST_Dataset/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST_Dataset/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST_Dataset/digit_ytest.csv'

NUM_CLASSES = 10 # digits from 0-9
VALIDATION_SIZE = 10000

## *Load Data*

In [4]:
%%time

y_train_data = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)
print('Y Train shape: ', y_train_data.shape)

y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)
print('Y Test shape: ', y_test.shape)

Y Train shape:  (60000,)
Y Test shape:  (10000,)
CPU times: user 214 ms, sys: 19.7 ms, total: 234 ms
Wall time: 232 ms


In [5]:
%%time

x_train_data = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)
print('X Train shape: ', x_train_data.shape)

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)
print('X Test shape: ', x_test.shape)

X Train shape:  (60000, 784)
X Test shape:  (10000, 784)
CPU times: user 27.8 s, sys: 1.15 s, total: 28.9 s
Wall time: 28.9 s


---

## *Data preprocessing*

In [6]:
# re-scale training data from 0-255 to 0-1
x_train_data, x_test = x_train_data/255.0, x_test/255.0

# convert target values (labels) to one-hot-encoding
y_train_data = np.eye(NUM_CLASSES)[y_train_data]
print('Y Train shape: ', y_train_data.shape)

y_test = np.eye(NUM_CLASSES)[y_test]
print('Y Test shape: ', y_test.shape)

Y Train shape:  (60000, 10)
Y Test shape:  (10000, 10)


### *Create validation dataset from training data*

In [7]:
x_val = x_train_data[:VALIDATION_SIZE]
y_val = y_train_data[:VALIDATION_SIZE]

x_train = x_train_data[VALIDATION_SIZE:]
y_train = y_train_data[VALIDATION_SIZE:]

print('Validation shape: ', x_val.shape, y_val.shape)
print('New training shape: ', x_train.shape, y_train.shape)


Validation shape:  (10000, 784) (10000, 10)
New training shape:  (50000, 784) (50000, 10)


---