<a href="https://colab.research.google.com/github/jvdowd/msds462/blob/master/msds_462_module1_fashion_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Update keras if needed to accomodate requirements in packages used
# !pip install -U keras


In [1]:
# Import relevant packages and 
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Import Model-Related Packages
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Import sklearn tools for model selection and evaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Helper libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Show plots in the notebook
%matplotlib inline

# Check version of tensorflow
print(tf.__version__)

# Google Drive packages for saving model for future use
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive 
from google.colab import auth 
from oauth2client.client import GoogleCredentials


2.2.0-rc2


Using TensorFlow backend.
  import pandas.util.testing as tm


In [2]:
# Grab data using keras API and separate into train and test data
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

# Define input shape for use in CNN convolution layers
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

# Convert labels to categorical variables for use in model
y_train = to_categorical(train_labels)
y_test = to_categorical(test_labels)

# Check size of labels and training data
print(train_labels.shape, y_train.shape)
print(test_labels.shape, y_test.shape)

#Here we split validation data to optimize classifier during training, using 20% of training data to optimize training
X_train, X_val, y_train, y_val = train_test_split(train_images, y_train, test_size=0.2, random_state=573)

# Define test data variables
X_test = test_images
y_test = y_test

# Reshape training, validation and test data for use in the CNN
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)

# Set training vars to float32 in order to divide by 255, scaling pixel values to between 0 and 1
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_val = X_val.astype('float32')

X_train /= 255
X_test /= 255
X_val /= 255

(60000,) (60000, 10)
(10000,) (10000, 10)


In [0]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [4]:
# Define settings for training process
batch_size = 256
num_classes = 10
epochs = 50

# Define Sequential Model for CNN
model = Sequential()

# First, add a convolutional layer, pooling and dropout
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 kernel_initializer='he_normal',
                 input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))

# Next, add second convolutional layer, pooling and dropout
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# Third
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))

model.add(Flatten())

# One dense hidden layer with dropout
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))

# Add output layer with appropriate number of classes and softmax activation to give a probability of each class
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

# Summarize the CNN architecture
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 3, 3, 128)        

In [0]:
%%timeit

# Set callback functions to early stop training and save the best model so far
callbacks = [EarlyStopping(monitor='val_loss', patience=2),
             ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]

# Train the model using selected parameters

history = model.fit(
                    X_train
                    , y_train
                    , batch_size=batch_size
                    , epochs=epochs
                    , verbose=1
                    , validation_data=(X_val, y_val)
                    , callbacks = callbacks
                    )


Train on 48000 samples, validate on 12000 samples
Epoch 1/50
Epoch 2/50


  'TensorFlow optimizers do not '


Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
10752/48000 [=====>........................] - ETA: 31s - loss: 0.3139 - accuracy: 0.8848

In [0]:
score = model.evaluate(X_test, y_test, verbose=0)

In [0]:
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [0]:
# get predicted classes for test data
predicted_classes = model.predict_classes(X_test)

# Create classification report for the test data
print(classification_report(test_labels, predicted_classes, target_names=class_names))

In [0]:
# Create confusion matrix and plot using seaborn
cm = confusion_matrix(test_labels, predicted_classes) #, labels=class_names)

f = plt.figure(figsize=(15,10))
ax = f.add_subplot()
sns.heatmap(cm, annot=True, ax = ax, fmt="d", cmap='Blues'); #annot=True to annotate cells

# labels, title and ticks
ax.set_xlabel('Predicted labels');
ax.set_ylabel('True labels'); 
ax.set_title('Confusion Matrix'); 
ax.xaxis.set_ticklabels(class_names); ax.yaxis.set_ticklabels(class_names);


In [0]:
# accuracy = history['accuracy']
# val_accuracy = history.history['val_accuracy']

# loss = history.history['loss']
# val_loss = history.history['val_loss']

# epochs = range(len(accuracy))

# plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
# plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
# plt.title('Training and validation accuracy')
# plt.legend()
# plt.figure()
# plt.plot(epochs, loss, 'bo', label='Training loss')
# plt.plot(epochs, val_loss, 'b', label='Validation loss')
# plt.title('Training and validation loss')
# plt.legend()
# plt.show()

In [0]:
# Save model to Google Drive
# auth.authenticate_user()
# gauth = GoogleAuth()
# gauth.credentials = GoogleCredentials.get_application_default()                       
# drive = GoogleDrive(gauth)

# # serialize model to JSON
# model_json = model.to_json()
# with open("model.json", "w") as json_file:
#     json_file.write(model_json)

# model_file = drive.CreateFile({'title' : 'model.json'})                       
# model_file.SetContentFile('model.json')                       
# model_file.Upload()

# # download to google drive                       
# drive.CreateFile({'id': model_file.get('id')})


In [0]:
# Save weights to Google Drive
# # serialize weights to HDF5
# model.save_weights('model.h5')

# model_weights = drive.CreateFile({'title' : 'model.h5'})                       
# model_weights.SetContentFile('model.h5')                       
# model_weights.Upload()

# # download to google drive                       
# drive.CreateFile({'id': model_weights.get('id')})

# **References**
Some helpful resources I found when assembling and troubleshooting my code

*  https://www.tensorflow.org/tutorials/keras/classification

*  https://www.kaggle.com/bugraokcu/cnn-with-keras/notebook
*  https://machinelearningmastery.com/loss-and-loss-functions-for-training-deep-learning-neural-networks/
*  https://medium.com/technologymadeeasy/the-best-explanation-of-convolutional-neural-networks-on-the-internet-fbb8b1ad5df8
*   https://medium.com/@amarbudhiraja/https-medium-com-amarbudhiraja-learning-less-to-learn-better-dropout-in-deep-machine-learning-74334da4bfc5
*   https://machinelearningmastery.com/how-to-stop-training-deep-neural-networks-at-the-right-time-using-early-stopping/











