# Data preparation

In [1]:
URL = 'https://nnfs.io/datasets/fashion_mnist_images.zip'
FILE = 'fashion_mnist_images.zip'
FOLDER = 'examples/fashion_mnist/data/'

In [2]:
import os
import urllib
import urllib.request

In [3]:
if not os.path.isfile(FILE):
    print(f"Downloading {URL} and saving as {FILE}...")
    urllib.request.urlretrieve(URL,FILE)

In [4]:
from zipfile import ZipFile

print("Unzipping images...")
with ZipFile(FILE) as zip_images:
    zip_images.extractall(FOLDER)

Unzipping images...


# Data loading

In [10]:
import os

DATA_DIR = 'examples/fashion_mnist/data'
TRAIN_DIR = 'examples/fashion_mnist/data/train'
TEST_DIR = 'examples/fashion_mnist/data/test'

labels = os.listdir(TRAIN_DIR)

files = os.listdir(TRAIN_DIR + '/0')

In [11]:
import cv2
import numpy as np
image_data = cv2.imread(TRAIN_DIR + '/7/0002.png', cv2.IMREAD_UNCHANGED)

np.set_printoptions(linewidth=200)

In [13]:
def load_mnist_dataset(dataset,path):
    
    # Scan ll the directories and create a list of labels
    labels = os.listdir(os.path.join(path, dataset))
    
    X, y = [], []
    
    for label in labels:
        for file in os.listdir(os.path.join(path, dataset, label)):
            image = cv2.imread(os.path.join(path, dataset, label, file), cv2.IMREAD_UNCHANGED)
        
            X.append(image)
            y.append(label)
            
    # Convert the data to proper numpy arrays and return
    return np.array(X), np.array(y).astype('uint8')

def create_data_mnist(path):
    # Load both sets separately
    X, y = load_mnist_dataset('train', path)
    X_test, y_test = load_mnist_dataset('test', path)
    # And return all the data
    return X, y, X_test, y_test

X, y, X_test, y_test = create_data_mnist(DATA_DIR)

# Data preprocessing

In [14]:
# Scale features between -1 and 1
X = (X.astype(np.float32) - 127.5) / 127.5
X_test = (X_test.astype(np.float32) - 127.5) / 127.5
assert -1 <= X.min(), X.max() <= 1 # Ensuring that scaling is correct

In [15]:
X.shape

(60000, 28, 28)

In [16]:
X = X.reshape(60000, -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Data shuffling