# Classifier for Fashion MNIST
### Implementing CNN models to classify Fashion MNIST and compare their performances
##### Download data from kaggle.com/zalando-research/fashionmnist/version/4
##### Unzip, put .csv files into the same folder as the code

In [1]:
#import useful modules here

import tensorflow
import keras
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [3]:
#load test and train data

from load_data import load_csv_as_array
test_data = load_csv_as_array("fashion-mnist_test.csv")
train_data = load_csv_as_array("fashion-mnist_train.csv")

#The 0th column contains the label, 0th row contains column descriptions
#Discard the 0th row
#Separate the data along the 0th column

#labels

testY = test_data[:,0]

Y = train_data[:,0]

#data
#Scale the data between 0-1

testX = test_data[:,1:]/255

X = train_data[:,1:]/255

#Get the train and validation set from X (80/20 split)

trainX, validX, trainY, validY = train_test_split(X, Y, test_size=0.2, random_state=42)

print("There are these many examples in test set:", testY.shape[0])
print("There are these many examples in train set:", trainY.shape[0])
print("There are these many examples in validation set:", validY.shape[0])
print("There are these many pixels per example:", testX.shape[1])

There are these many examples in test set: 10000
There are these many examples in train set: 48000
There are these many examples in validation set: 12000
There are these many pixels per example: 784


In [4]:
#convert the row-wise features (784 pixels) into a 2-D 28x28 image
trainX = trainX.reshape((trainX.shape[0], 28,28,1))
testX = testX.reshape((testX.shape[0], 28,28,1))
validX = validX.reshape((validX.shape[0], 28,28,1))

In [5]:
#Baseline CNN
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
from keras.regularizers import l2
from keras.optimizers import Adam

#construct the CNN model
simpCNNModel = Sequential()
simpCNNModel.add(Conv2D(28, (3, 3), strides=(2,2), activation='relu',
            input_shape = (28,28,1)))
simpCNNModel.add(Conv2D(14, (3, 3), strides=(2,2), activation='relu'))
simpCNNModel.add(Flatten())
simpCNNModel.add(Dense(128, activation='softplus', kernel_regularizer=l2(0)))
simpCNNModel.add(Dense(32, activation='softplus'))
simpCNNModel.add(Dense(10, activation='softmax'))

#compile the model

optimizer = Adam(lr = .0001, decay = 5e-5)

simpCNNModel.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])


In [6]:
#fit and evaluate
print(simpCNNModel.summary())

simpCNNModel.fit(trainX, trainY,
            batch_size=512, epochs=15, verbose = 1,
            validation_data=(validX, validY)
            )

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 13, 13, 28)        280       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 6, 6, 14)          3542      
_________________________________________________________________
flatten_1 (Flatten)          (None, 504)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               64640     
_________________________________________________________________
dense_2 (Dense)              (None, 32)                4128      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                330       
Total params: 72,920
Trainable params: 72,920
Non-trainable params: 0
_________________________________________________________________
None
T

<keras.callbacks.History at 0x279a1cc5e10>

In [7]:
#Try to improve on the baseline
#Let's try adding a maxpool layer

from keras.layers.convolutional import MaxPooling2D

#construct the CNN model
CNNModel = Sequential()
CNNModel.add(Conv2D(28, (3, 3), strides=(2,2), activation='relu',
            input_shape = (28,28,1)))
CNNModel.add(Conv2D(14, (3, 3), strides=(2,2), activation='relu'))
CNNModel.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

CNNModel.add(Flatten())
CNNModel.add(Dense(128, activation='softplus', kernel_regularizer=l2(0)))
CNNModel.add(Dense(32, activation='softplus'))
CNNModel.add(Dense(10, activation='softmax'))

#compile the model

optimizer = Adam(lr = .0001, decay = 5e-5)

CNNModel.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

#fit and evaluate
CNNModel.fit(trainX, trainY,
            batch_size=512, epochs=15, verbose = 1,
            validation_data=(validX, validY)
            )

Train on 48000 samples, validate on 12000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x279a725d128>

In [8]:
#That doesn't really change much (~4% isn't that much of a difference)
#Let's try adding batch normalization 

from keras.layers.normalization import BatchNormalization

#construct the CNN model
CNNModel = Sequential()
CNNModel.add(Conv2D(28, (3, 3), strides=(2,2), activation='relu',
            input_shape = (28,28,1)))
CNNModel.add(BatchNormalization())
CNNModel.add(Conv2D(14, (3, 3), strides=(2,2), activation='relu'))
CNNModel.add(BatchNormalization())
CNNModel.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

CNNModel.add(Flatten())
#add regularization here
CNNModel.add(Dense(128, activation='softplus', kernel_regularizer=l2(0.1)))
CNNModel.add(BatchNormalization())
CNNModel.add(Dense(32, activation='softplus'))
CNNModel.add(Dense(10, activation='softmax'))

#compile the model

optimizer = Adam(lr = .0001, decay = 5e-5)

CNNModel.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

#fit and evaluate
CNNModel.fit(trainX, trainY,
            batch_size=512, epochs=15, verbose = 1,
            validation_data=(validX, validY)
            )

Train on 48000 samples, validate on 12000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x279a8108d68>

In [10]:
#The best performer is CNNModel on Validation data

loss, accuracy = CNNModel.evaluate(testX, testY)

print('test loss:', loss)
print('test acc:', accuracy)

test acc: 0.8507


Further directions could be to use the validation results to tune hyperparameters. The learning rate used and filter sizes could all be tuned as hyper-parameters. We could also use something like t-SNE instead to tune the hyperparameters, instead of accuracy.