## CNN model for Fashion MNIST dataset using Keras
        * Author: @Sameer Kesava
                         
                         * 2 Convolutional layers and 2 Dense Layers
                         
                         * Batch size = 64
                         
                         * Adam Optimizer with learning rate = 0.001
                         
                         * Dropout probability: 0.5
                         
                         * Epochs = 20
                         
                         * Training accuracy: 90.0% 
                         
                         * Test accuracy 90.1% indicating neither over nor underfitting of the training data
    

In [1]:
from __future__ import division, print_function

import tensorflow_datasets as tfds
import tensorflow as tf


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [0]:
tf.logging.set_verbosity(tf.logging.ERROR)

In [0]:
from tensorflow.keras.datasets import fashion_mnist

In [4]:
dataset = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [5]:
len(dataset)

2

In [6]:
len(dataset[0])

2

In [7]:
len(dataset[0][0])

60000

In [8]:
x_train = dataset[0][0]
y_train = dataset[0][1]
print(x_train.shape, y_train.shape)

(60000, 28, 28) (60000,)


In [9]:
x_test = dataset[1][0]
y_test = dataset[1][1]
print(x_test.shape, y_test.shape)

(10000, 28, 28) (10000,)


## Scaling

In [0]:
import numpy as np

In [11]:
std_pix = np.std(x_train)
std_pix

90.02118235130519

In [12]:
x_train = x_train.reshape(-1, 784)
x_train.shape

(60000, 784)

In [13]:
x_test = x_test.reshape(-1, 784)
x_test.shape

(10000, 784)

In [14]:
mean_pix = np.mean(x_train, axis = 0)
mean_pix.shape

(784,)

In [15]:
std_pix_col = np.std(x_train, axis = 0)
print(np.min(std_pix_col))

0.09255283175929792


In [0]:
x_train_scaled =  (x_train - mean_pix)/std_pix
x_test_scaled = (x_test - mean_pix)/std_pix

In [17]:
print('min and max scaled values: {} {}'.format(np.min(x_train_scaled),np.max(x_train_scaled)))

min and max scaled values: -1.7982032573357591 2.8317651839451097


#### Reshaping

In [0]:
x_train_scaled = x_train_scaled.reshape(-1, 28, 28, 1)
x_test_scaled = x_test_scaled.reshape(-1, 28, 28, 1)

In [19]:
x_train_scaled.shape

(60000, 28, 28, 1)

## One-hot encoding y values

In [20]:
y_unique = np.unique(y_train)
y_unique

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [21]:
from keras.utils import to_categorical

Using TensorFlow backend.


In [22]:
y_onehot = to_categorical(y_train)
y_onehot.shape

(60000, 10)

In [23]:
y_test_onehot = to_categorical(y_test)
y_test_onehot.shape

(10000, 10)

## Creating a validation dataset. Can shuffle before splitting

In [0]:
x_valid_scaled, x_train_scaled = x_train_scaled[:10000], x_train_scaled[10000:]
y_valid_onehot, y_train_onehot = y_onehot[:10000], y_onehot[10000:]

In [25]:
y_train_onehot.shape

(50000, 10)

In [26]:
x_train_scaled.shape

(50000, 28, 28, 1)

## Building a CNN

In [0]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters = 32, kernel_size = (3,3), strides = (2,2), padding = 'same', activation = tf.nn.relu, input_shape = (28,28,1)),
    tf.keras.layers.MaxPooling2D(pool_size = (2,2), strides=(2,2), padding = 'same'),
    tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3), strides = (2,2), padding = 'same', activation = tf.nn.relu),
    tf.keras.layers.AveragePooling2D(pool_size = (2,2), strides=(2,2), padding = 'same'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(rate = 0.5),
    tf.keras.layers.Dense(units = 512, activation = tf.nn.relu),    
    tf.keras.layers.Dense(units = 10, activation = None)
])

In [0]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr = 0.001), loss = tf.losses.softmax_cross_entropy, metrics = ['accuracy'])

In [33]:
model.fit(x = x_train_scaled, y = y_train_onehot, batch_size = 64, epochs = 20, verbose = 1, validation_data = [x_valid_scaled, y_valid_onehot],
         shuffle = True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f0a48a60fd0>

## Testing on test data

In [34]:
test_loss, test_accuracy =  model.evaluate(x = x_test_scaled, y = y_test_onehot, batch_size=64)
print(test_loss, test_accuracy)

0.2720232246160507 0.9006


In [61]:
predictions =  model.predict(x = x_test_scaled)
predictions.shape

(10000, 10)

In [62]:
predicted_labels = np.argmax(predictions, axis=1)
predicted_labels.shape

(10000,)

In [63]:
predicted_labels[:10]

array([9, 2, 1, 1, 6, 1, 4, 6, 5, 7])

In [64]:
y_test[:10]

array([9, 2, 1, 1, 6, 1, 4, 6, 5, 7], dtype=uint8)