In [None]:
# imports
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import tensorflow as tf

from keras import layers
import keras

def plot_sample_images(images, size = 100):
    _imgs = images[:size]
    _size = int(np.ceil(np.sqrt(size)))
    for i in range(len(_imgs)):
        axs = plt.subplot(_size, _size, i + 1)
        plt.imshow(_imgs[i])
        plt.axis('off')

In [None]:
train_df = pd.read_csv("./data/input/train.csv")
test_df = pd.read_csv("./data/input/test.csv")

label = 'label'
pixels = train_df.columns.drop(label).tolist()

X = train_df[pixels]
y = train_df[label]

X_reshaped = X.to_numpy().reshape(-1, 28, 28, 1)
X_test_unseen = test_df[pixels].to_numpy().reshape(-1, 28, 28, 1)

n_labels = y.unique().shape[0]

y_onehot = tf.one_hot(y, n_labels).numpy()

## Image Rescaling and Reshaping

In [None]:
resize_and_rescale = keras.Sequential(
    [
        layers.Resizing(24, 24),
        layers.Rescaling(1./255.)
    ]
)

X_resized_and_rescaled = resize_and_rescale(X_reshaped)
X_test_unseen_resized_and_rescaled = resize_and_rescale(X_test_unseen)

### Plotting: Resized and Rescaled Images

In [None]:
plot_sample_images(X_resized_and_rescaled, 9)
plt.suptitle("Before Resizing and Rescaling")

In [None]:
plot_sample_images(X_resized_and_rescaled, 9)
plt.suptitle("After Resizing and Rescaling")

### Save: Resized and Rescaled Images

In [None]:
from datetime import datetime

now = datetime.now().strftime("%Y%m%d_%H%M%S")

# Save transformed data - train (X, Y)
np.savez_compressed(f"./data/transformed/train_transformed_{now}", X = X_resized_and_rescaled, y = y_onehot)

# Save transformed data - test unseen (X)
np.savez_compressed(f"./data/transformed/test_unseen_transformed_{now}", X = X_test_unseen_resized_and_rescaled)

## Image Augmentation

In [None]:
rescaler = keras.Sequential([layers.Rescaling(1./255.)])

data_augmenter = keras.Sequential(
    [
        layers.CenterCrop(24, 24),
        layers.
    ]
)

preprocessor = keras.Sequential(
    [
        rescaler,
        data_augmenter
    ]
)

X_augmented = preprocessor(X_reshaped)

### Plotting: Augmented Image

In [None]:
plot_sample_images(X_reshaped, 9)
plt.suptitle("Before Augmentation")

In [None]:
plot_sample_images(X_augmented, 9)
plt.suptitle("After Augmentation")

### Save: Augmented Images

In [None]:
from datetime import datetime

now = datetime.now().strftime("%Y%m%d_%H%M%S")

# Save train augmented data (X, y)
np.savez_compressed(f"./data/transformed/augmented/train_augmented_{now}", X = X_augmented, y = y_onehot)