<a href="https://colab.research.google.com/github/saurav2sengupta/learnML/blob/master/fashion_mnist_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Import basic Python libraries
import numpy as np
import pandas as pd
import time
import random
import matplotlib.pyplot as plt
 
%matplotlib inline

In [2]:
from keras.datasets import fashion_mnist

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

print("Shape of x_train {}".format(x_train.shape))
print("Shape of y_train {}".format(y_train.shape))
print("Shape of x_test {}".format(x_test.shape))
print("Shape of y_test {}".format(y_test.shape))


Using TensorFlow backend.


Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Shape of x_train (60000, 28, 28)
Shape of y_train (60000,)
Shape of x_test (10000, 28, 28)
Shape of y_test (10000,)


In [3]:
labelNames = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", 
              "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [4]:
# Normalize the data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255.0
x_test /= 255.0

In [5]:
#Reshape the train and test sets into two dimensional matrix
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1] * x_train.shape[2])
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1] * x_test.shape[2])

print(x_train.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


In [6]:
#Import classic ML models
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [7]:
"""
# SVM Model
start1 = time.time()
svc = SVC(C=1, kernel='linear', gamma="auto")
svc.fit(x_train, y_train)
end1 = time.time()
svm_time = end1 - start1
"""

# KNN Model
start2 = time.time()
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)
#y_pred_knn = knn.predict(x_test)
end2 = time.time()
knn_time = end2 - start2

# Random Forest
start3 = time.time()
random_forest = RandomForestClassifier(criterion='entropy', max_depth=70, 
                                       n_estimators=100)
random_forest.fit(x_train, y_train)
end3 = time.time()
forest_time = end3 - start3

# Decision Tree
start4 = time.time()
tree = DecisionTreeClassifier(max_depth=100, criterion='entropy')
tree.fit(x_train, y_train)
end4 = time.time()
tree_time = end4 - start4

#print("SVM Time: {:0.2f} minute".format(svm_time/60.0))
print("KNN Time: {:0.2f} minute".format(knn_time/60.0))
print("Random Forest Time: {:0.2f} minute".format(forest_time/60.0))
print("Decision Tree Time: {:0.2f} minute".format(tree_time/60.0))

KNN Time: 0.20 minute
Random Forest Time: 2.10 minute
Decision Tree Time: 0.76 minute


In [9]:
#Check the accuracy for each of the above ML models
from sklearn import metrics

# KNN report and analysis
y_pred_knn = knn.predict(x_test)

knn_f1 = metrics.f1_score(y_test, y_pred_knn, average= "weighted")
knn_accuracy = metrics.accuracy_score(y_test, y_pred_knn)
#knn_cm = metrics.confusion_matrix(y_test, y_pred_knn)

In [11]:
print("-----------------K-nearest neighbors Report---------------")
print("F1 score: {}".format(knn_f1))
print("Accuracy score: {}".format(knn_accuracy))

-----------------K-nearest neighbors Report---------------
F1 score: 0.8546439722018904
Accuracy score: 0.8554


In [12]:
# Random Forest report and analysis
y_pred_forest = random_forest.predict(x_test)
random_forest_f1 = metrics.f1_score(y_test, y_pred_forest, average= "weighted"
)
random_forest_accuracy = metrics.accuracy_score(y_test, y_pred_forest)

In [13]:
print("-----------------Random Forest Report---------------")
print("F1 score: {}".format(random_forest_f1))
print("Accuracy score: {}".format(random_forest_accuracy))

-----------------Random Forest Report---------------
F1 score: 0.8760242660853803
Accuracy score: 0.8776


In [14]:
# Decision Tree report and analysis
y_pred_tree = tree.predict(x_test)
tree_f1 = metrics.f1_score(y_test, y_pred_tree, average= "weighted")
tree_accuracy = metrics.accuracy_score(y_test, y_pred_tree)

print("-----------------Decision Tree Report---------------")
print("F1 score: {}".format(tree_f1))
print("Accuracy score: {}".format(tree_accuracy))

-----------------Decision Tree Report---------------
F1 score: 0.8014683836996909
Accuracy score: 0.8007
