## MLP model for Fashion MNIST dataset using Keras
        * Author: @Sameer Kesava
                         
                         * Just one  hidden layer with 128 units and tf.nn.relu activation function
                         
                         * Batch size = 64
                         
                         * Adam Optimizer and default learning rate
                         
                         * Training accuracy: 95.1% 
                         
                         * Test accuracy 88.6% indicating overfitting of training data
    

In [1]:
from __future__ import division, print_function

import tensorflow_datasets as tfds
import tensorflow as tf


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [0]:
tf.logging.set_verbosity(tf.logging.ERROR)

In [0]:
from tensorflow.keras.datasets import fashion_mnist

In [4]:
dataset = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [6]:
len(dataset)

2

In [7]:
len(dataset[0])

2

In [8]:
len(dataset[0][0])

60000

In [9]:
x_train = dataset[0][0]
y_train = dataset[0][1]
print(x_train.shape, y_train.shape)

(60000, 28, 28) (60000,)


In [10]:
x_test = dataset[1][0]
y_test = dataset[1][1]
print(x_test.shape, y_test.shape)

(10000, 28, 28) (10000,)


Scaling

In [0]:
import numpy as np

In [13]:
mean_pix = np.mean(x_train)
mean_pix

72.94035223214286

In [14]:
std_pix = np.std(x_train)
std_pix

90.02118235130519

In [15]:
x_train = x_train.reshape(-1, 784)
x_train.shape

(60000, 784)

In [19]:
x_test = x_test.reshape(-1, 784)
x_test.shape

(10000, 784)

In [17]:
mean_pix = np.mean(x_train, axis = 0)
mean_pix.shape

(784,)

In [18]:
std_pix_col = np.std(x_train, axis = 0)
print(np.min(std_pix_col))

0.09255283175929792


In [0]:
x_train_scaled =  (x_train - mean_pix)/std_pix
x_test_scaled = (x_test - mean_pix)/std_pix

In [25]:
print('min and max scaled values: {} {}'.format(np.min(x_train_scaled),np.max(x_train_scaled)))

min and max scaled values: -1.7982032573357591 2.8317651839451097


In [26]:
y_unique = np.unique(y_train)
y_unique

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

One-hot encoding y values

In [0]:
from keras.utils import to_categorical

In [28]:
y_onehot = to_categorical(y_train)
y_onehot.shape

(60000, 10)

In [61]:
y_test_onehot = to_categorical(y_test)
y_test_onehot.shape

(10000, 10)

Creating a validation dataset. Can shuffle before splitting

In [0]:
x_valid_scaled, x_train_scaled = x_train_scaled[:10000], x_train_scaled[10000:]
y_valid_onehot, y_train_onehot = y_onehot[:10000], y_onehot[10000:]

In [30]:
y_train_onehot.shape

(50000, 10)

In [46]:
x_train_scaled.shape

(50000, 784)

Building a NN

In [0]:
model = tf.keras.Sequential()

In [0]:
batch_size = 64

In [0]:
model.add(tf.keras.layers.Dense(units = 128, input_shape = [784], activation=tf.nn.relu))

In [0]:
model.add(tf.keras.layers.Dense(units = 10, activation=tf.nn.softmax))

In [0]:
model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['accuracy'])

In [60]:
model.fit(x = x_train_scaled, y = y_train_onehot, validation_data=(x_valid_scaled, y_valid_onehot), batch_size=batch_size,
         epochs = 20, verbose = 1, shuffle=True)

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f2e517c6ba8>

Testing on test data

In [63]:
test_loss, test_accuracy =  model.evaluate(x = x_test_scaled, y = y_test_onehot, batch_size=batch_size)
print(test_loss, test_accuracy)

0.018156316655874253 0.8862


In [64]:
predictions =  model.predict(x = x_test_scaled)
predictions.shape

(10000, 10)

In [67]:
predicted_labels = np.argmax(predictions, axis=1)
predicted_labels.shape

(10000,)

In [68]:
predicted_labels[:10]

array([9, 2, 1, 1, 6, 1, 4, 6, 5, 7])

In [69]:
y_test[:10]

array([9, 2, 1, 1, 6, 1, 4, 6, 5, 7], dtype=uint8)