In [1]:
from tensorflow.keras.datasets import mnist

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import math

In [19]:
### normalizes array and changes datatype to np.float32
def normalize_arr(arr):
    arr = arr.astype(np.float32)
    norm_arr = (arr - np.min(arr)) / (np.max(arr) - np.min(arr))
    return norm_arr

### outputs list of NxN patches from an image
def get_patches(image, patch_size, pad_width, stride):
    # find shape of original image and shape of output
    x_img, y_img = image.shape
    x_out = ((x_img - patch_size + 2 * pad_width) // stride) + 1
    y_out = ((y_img - patch_size + 2 * pad_width) // stride) + 1

    # pad original image
    image = np.pad(image, pad_width=pad_width, mode='constant', constant_values=0)

    # find all patches and place into output matrix
    output = np.empty((x_out, y_out, patch_size, patch_size), dtype=image.dtype)
    
    for x in range(x_out):
        for y in range(y_out):
            x_start = x * stride
            y_start = y * stride
            
            output[x,y] = image[x_start:(x_start + patch_size), y_start:(y_start + patch_size)]

    # reshape output to list
    output = output.reshape(-1, patch_size, patch_size)
    
    return output

In [24]:
### load mnist dataset and data preprocessing
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print('original   shape:', x_train.shape)
print('           dtype:', x_train.dtype)

x_train = normalize_arr(x_train)

print('normalized shape:', x_train.shape)
print('           dtype:', x_train.dtype)\

patch_size = 3
pad_width = 1
stride = 1
x_train_patches = np.empty((x_train.shape[0]), dtype=x_train.dtype)
print(x_train_patches.shape)

# x_train_patches = get_patches(x_train, patch_size, pad_width, stride)

original   shape: (60000, 28, 28)
           dtype: uint8
normalized shape: (60000, 28, 28)
           dtype: float32
(60000,)
