In [1]:
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit

from tensorflow.keras.datasets import mnist

In [2]:
SAVE_DATA = True

In [3]:
(X, y), (X_test, y_test) = mnist.load_data()

X = (X.astype(np.float32) - 127.5) / 127.5
X_test = (X_test.astype(np.float32) - 127.5) / 127.5

X = X.reshape((*X.shape, 1))
X_test = X_test.reshape((*X_test.shape, 1))

## **Build Dataset**

In [4]:
spliter = StratifiedShuffleSplit(n_splits=1, test_size=X_test.shape[0], random_state=123)
for train_idx, test_idx in spliter.split(X, y):
    X_train, y_train = X[train_idx], y[train_idx]
    X_val, y_val = X[test_idx], y[test_idx]

print(f"Train data shape: {X_train.shape} Val data shape: {X_val.shape} Test data shape: {X_test.shape}")

Train data shape: (50000, 28, 28, 1) Val data shape: (10000, 28, 28, 1) Test data shape: (10000, 28, 28, 1)


## **Save the data to folder**

In [5]:
if(SAVE_DATA):
    with open('data/train.npy', mode='wb') as outfile:
        np.save(outfile, X_train, allow_pickle=True)
        np.save(outfile, y_train, allow_pickle=True)

    with open('data/val.npy', mode='wb') as outfile:
        np.save(outfile, X_val, allow_pickle=True)
        np.save(outfile, y_val, allow_pickle=True)

    with open('data/test.npy', mode='wb') as outfile:
        np.save(outfile, X_test, allow_pickle=True)
        np.save(outfile, y_test, allow_pickle=True)