In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import matplotlib.pyplot as plt

print("Current version of tensorflow:", tf.__version__)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Introduction

In this notebook I will be using the fashion mnist datasets to predict the classes of the images

This is a dataset of **60,000** $28 x 28$ grayscale images of 10 fashion categories, along with a test set of 10,000 images. This dataset can be used as a drop-in replacement for MNIST. The class labels are:

|Label|	Description|
|:---:|:---:|
|0|	T-shirt/top|
|1|	Trouser|
|2|	Pullover|
|3|	Dress|
|4|	Coat|
|5|	Sandal|
|6|Shirt|
|7|	Sneaker|
|8|	Bag|
|9|	Ankle boot|

In this I will be making MLP model and CNN Model. Challenge is CNN model will have very few parameters than MLP model and yet it would be more accurate than MLP model

### Exploring the Dataset

In [None]:
klass = ["T-shirt/top", "Trousers", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle Boot"]

In [None]:
df_train = pd.read_csv("/kaggle/input/minst-fashion-dataset/fashion-mnist_train.csv")
df_test = pd.read_csv("/kaggle/input/minst-fashion-dataset/fashion-mnist_test.csv")

df_train.head(5)

In [None]:
images = df_train.sample(25)

idx = 0

plt.figure(figsize=(15,15))
for image in images.iterrows():
    ax = plt.subplot(5, 5, idx + 1)
    im = image[1].drop(["label"]).to_numpy().reshape(28, 28)
    lbl = image[1]["label"]
    
    ax.imshow(im, cmap="gray")
    ax.set_yticks([])
    ax.set_xticks([])
    ax.set_title(klass[lbl])
    idx += 1


### Creating MLP Model

The following layers would be used

1. Flatten
2. 2 x Dense
3. BatchNormalization
4. 2 x Dense
5. Dropout
6. 1 x Dense

**Data preprocessing**

In [None]:
X_train = df_train.drop(["label"], axis=1).to_numpy().reshape(60000, 28, 28)
Y_train = df_train["label"].to_numpy()

X_test = df_test.drop(["label"], axis=1).to_numpy().reshape(10000, 28, 28)
Y_test = df_test["label"].to_numpy()

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

In [None]:
mlp_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation="relu"),
    tf.keras.layers.Dense(1024, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(1024, activation="relu"),
    tf.keras.layers.Dense(512, activation="relu"),
    tf.keras.layers.Dropout(rate=0.3),
    tf.keras.layers.Dense(10, activation="softmax")
], name="fashion_mnist_mlp_model")
mlp_model.build((None, 28, 28))
mlp_model.summary()

**Compiling the model**

In [None]:
mlp_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["acc"])

Finally training the mlp model

In [None]:
mlp_history = mlp_model.fit(X_train, Y_train, epochs=25, batch_size=32, validation_split=0.3)

Let's visualize the training of mlp model

In [None]:
loss = mlp_history.history["loss"]
acc = mlp_history.history["acc"]

val_loss = mlp_history.history["val_loss"]
val_acc = mlp_history.history["val_acc"]

fig, axes = plt.subplots(1, 2, figsize=(35, 8))

axes[0].plot(loss, label="Loss")
axes[0].plot(acc, label="Accuracy")
axes[0].set_title("Training Metrics")
axes[0].legend()
axes[0].set_xlabel("Epochs")

axes[1].plot(val_loss, label="Loss")
axes[1].plot(acc, label="Accuracy")
axes[1].set_title("Testing Metrics")
axes[1].legend()
axes[1].set_xlabel("Epochs")
axes[1].set_ylim([0, 1])
plt.show()

Let's evaluate the model

In [None]:
_, acc = mlp_model.evaluate(X_test, Y_test)
print("Accuracy of MLP Model: %.2f" % (acc*100) + "%")

### Creating CNN Model

The layers for this model will go like this

1. 1 x Conv2D
2. 1 x MaxPool2D
3. 1 x Conv2D
2. 1 x MaxPool2D
3. 1 x Conv2D
4. 1 x MaxPool2D
5. 1 x Flatten
6. 2 x Dense
7. 1 x Dropout
8. 1 x Dense

In [None]:
cnn_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation="relu"),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(32, (3, 3), activation="relu"),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, (3, 3), activation="relu"),
    
    tf.keras.layers.Flatten(),
    
    tf.keras.layers.Dense(256, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dense(512, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dropout(rate=0.3),
    tf.keras.layers.Dense(10, activation="softmax")
], name="fashion_mnist_cnn_model")
cnn_model.build((None, 28, 28, 1))
cnn_model.summary()

Huh, what gonna happen with only 100 thousand params. 

Let's see

**Data Preprocessing**

In [None]:
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]
print(X_train.shape)
print(X_test.shape)

In [None]:
cnn_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["acc"])

In [None]:
cnn_history = cnn_model.fit(X_train, Y_train, epochs=25, batch_size=32, validation_split=0.3)

Evaluating the model

In [None]:
_, acc = cnn_model.evaluate(X_test, Y_test)
print("Accuracy of CNN Model: %.2f" % (acc*100) + "%")

In [None]:
Visualizing the training of the model

In [None]:
loss = cnn_history.history["loss"]
acc = cnn_history.history["acc"]

val_loss = cnn_history.history["val_loss"]
val_acc = cnn_history.history["val_acc"]

fig, axes = plt.subplots(1, 2, figsize=(35, 8))

axes[0].plot(loss, label="Loss")
axes[0].plot(acc, label="Accuracy")
axes[0].set_title("Training Metrics")
axes[0].legend()
axes[0].set_xlabel("Epochs")

axes[1].plot(val_loss, label="Loss")
axes[1].plot(acc, label="Accuracy")
axes[1].set_title("Testing Metrics")
axes[1].legend()
axes[1].set_xlabel("Epochs")
axes[1].set_ylim([0, 1])
plt.show()

### Plotting the accuracy of MLP Model vs CNN Model

In [None]:
cnn_train = cnn_history.history["acc"]
mlp_train = mlp_history.history["acc"]

cnn_val = cnn_history.history["val_acc"]
mlp_val = mlp_history.history["val_acc"]

fig, axes = plt.subplots(1, 2, figsize=(35, 10))

axes[0].plot(cnn_train, label="CNN Model")
axes[0].plot(mlp_train, label="MLP Model")
axes[0].set_title("Training Accuracy")
axes[0].legend()
axes[0].set_xlabel("Epochs")

axes[1].plot(cnn_val, label="CNN Model")
axes[1].plot(mlp_val, label="MLP Model")
axes[1].set_title("Validation Accuracy")
axes[1].legend()
axes[1].set_xlabel("Epochs")
axes[1].set_ylim([0, 1])
plt.show()