In [12]:
#!pip install tensorflow==2.0.0-rc1 --quiet

import warnings
warnings.filterwarnings("ignore")

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.optimizers import Adam, SGD

# Data Cleaning

In [51]:
(X_train_raw, y_train), (X_test_raw, y_test) = fashion_mnist.load_data()

In [52]:
X_train.shape

(60000, 784)

In [53]:
# scale
X_train = X_train / 255.0
X_test = X_test / 255.0

# reshape to use in Dense layers
X_train = X_train_raw.reshape(X_train_raw.shape[0], 28*28)
X_test = X_test_raw.reshape(X_test_raw.shape[0], 28*28)

In [54]:
X_test.shape

(10000, 784)

In [55]:
# convert labels to numeric
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [56]:
X_train_conv = X_train_raw.reshape(X_train_raw.shape[0],X_train_raw.shape[1],X_train_raw.shape[1],1)
X_test_conv = X_test_raw.reshape(X_test_raw.shape[0],X_test_raw.shape[1],X_test_raw.shape[1],1)


In [57]:
X_test_conv.shape

(10000, 28, 28, 1)

In [26]:
y_train

array([[0., 0., 0., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

# Building the Models

## Model 1: 5 layers, Adam

In [40]:
model_1 = Sequential()
model_1.add(Dense(128, input_dim = X_train.shape[1], activation = 'relu'))
model_1.add(Dense(64, activation = 'relu'))
model_1.add(Dense(64, activation = 'relu'))
model_1.add(Dense(32, activation = 'relu'))
model_1.add(Dense(10, activation = 'softmax'))

In [41]:
model_1.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

model_1.fit(X_train, y_train, validation_data = (X_test, y_test), 
         epochs = 100, batch_size = 200, verbose = 1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100


Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1903b7a83c8>

Train Loss: 0.1809 <br>
Train Accuracy: 0.9344 <br>
Test Loss: 0.3676 <br>
Test Accuracy: 0.8832

## Model 2: 5 layers, RMSprop

In [42]:
model_2 = Sequential()
model_2.add(Dense(128, input_dim = X_train.shape[1], activation = 'relu'))
model_2.add(Dense(64, activation = 'relu'))
model_2.add(Dense(64, activation = 'relu'))
model_2.add(Dense(32, activation = 'relu'))
model_2.add(Dense(10, activation = 'softmax'))

In [43]:
model_2.compile(loss = 'categorical_crossentropy', optimizer = 'RMSprop', metrics = ['accuracy'])

model_2.fit(X_train, y_train, validation_data = (X_test, y_test), 
         epochs = 100, batch_size = 200, verbose = 1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100


Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1903bedcb88>

Train Loss: 0.1684<br>
Train Accuracy: 0.9373 <br>
Test Loss: 0.3622 <br>
Test Accuracy: 0.8868 <br>

Using RMSprop as the optimizer very slightly improves accuracy and loss.

## Model 3: 7 layers, RMSprop

In [46]:
model_3 = Sequential()
model_3.add(Dense(128, input_dim = X_train.shape[1], activation = 'relu'))
model_3.add(Dense(128, activation = 'relu'))
model_3.add(Dense(64, activation = 'relu'))
model_3.add(Dense(64, activation = 'relu'))
model_3.add(Dense(32, activation = 'relu'))
model_3.add(Dense(32, activation = 'relu'))
model_3.add(Dense(10, activation = 'softmax'))

In [47]:
model_3.compile(loss = 'categorical_crossentropy', optimizer = 'RMSprop', metrics = ['accuracy'])

model_3.fit(X_train, y_train, validation_data = (X_test, y_test), 
         epochs = 100, batch_size = 200, verbose = 1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100


Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1903fb8de08>

Train Loss: 0.1571 <br>
Train Accuracy: 0.9407 <br>
Test Loss: 0.4407 <br>
Test Accuracy: 0.8878 <br>

Increasing the number of layers increased accuracy and decreased loss very slightly. It may not be worth the extra computation for such little increase in performance.

## Convolutional Model

In [49]:
model_conv = Sequential()
model_conv.add(Conv2D(128, input_shape = (28, 28,1), kernel_size = (3,3), activation = 'relu'))
model_conv.add(Conv2D(128, kernel_size = (3,3), activation = 'relu'))
model_conv.add(Flatten())
model_conv.add(Dense(64, activation = 'relu'))
model_conv.add(Dense(32, activation = 'relu'))
model_conv.add(Dense(10, activation ='softmax'))

In [None]:
model_conv.compile(loss = 'categorical_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])

model_conv.fit(X_train_conv, y_train, validation_data = (X_test_conv, y_test), 
         epochs = 20, batch_size = 100, verbose = 1)

# I only use 20 epochs to cut down on computing time

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
 6800/60000 [==>...........................] - ETA: 7:03 - loss: nan - accuracy: 0.0996