In [1]:
# Double check that keras can access the GPU
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

Using TensorFlow backend.


['/gpu:0']

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import keras
from keras.datasets import fashion_mnist
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout, BatchNormalization, MaxPooling2D

In [3]:
# Load in training and testing data.  
# Fashion_mnist dataset has a bug that makes the dataset loaded read-only.
# Read in the dataset and then copy to a different variable during normalization step to work around this.  
(X_train_0, y_train), (X_test_0, y_test) = fashion_mnist.load_data()

In [4]:
NUM_CLASSES = 10

In [5]:
# Create flattened version of data for use in vanilla deep NN (28 x 28 = 784)
flat_X_train = X_train_0.reshape(60000, 784).astype('float32')
flat_X_test = X_test_0.reshape(10000, 784).astype('float32')
X_train = X_train_0.reshape(60000, 28, 28, 1).astype('float32')
X_test = X_test_0.reshape(10000, 28, 28, 1).astype('float32')
# Normalize data for CNN model. These examples are still 28 x 28 matrices. 
X_train = X_train / 255.
X_test = X_test / 255.

# Normalize data for vanilla Deep NN model.  
flat_X_train /= 255.
flat_X_test /= 255.

# Convert labels to one-hot vectors
y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)
y_test = keras.utils.to_categorical(y_test, NUM_CLASSES)

In [6]:
# Begin building Vanilla Deep NN.  
deep_nn = Sequential()
deep_nn.add(Dense(256, activation='relu', input_shape=(784,)))
deep_nn.add(Dropout(0.5))
deep_nn.add(BatchNormalization())
deep_nn.add(Dense(128, activation='relu'))
deep_nn.add(Dropout(0.5))
deep_nn.add(BatchNormalization())
deep_nn.add(Dense(NUM_CLASSES, activation='softmax'))

deep_nn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
deep_nn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 256)               200960    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 256)               1024      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               32896     
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 128)               512       
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
Total para

In [7]:
tb_callback = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=1, batch_size=32, 
                            write_graph=True, write_grads=True, write_images=True, 
                            embeddings_freq=0, embeddings_layer_names=None, 
                            embeddings_metadata=None)

In [8]:
deep_nn.fit(flat_X_train, y_train, epochs=10, validation_data=(flat_X_test, y_test), callbacks=[tb_callback])

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x25fddfe44a8>

In [16]:
keras.backend.clear_session()

In [17]:
cnn_model = Sequential()
cnn_model.add(Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=X_train.shape[1:]))
cnn_model.add(MaxPooling2D(pool_size=(2, 2)))
cnn_model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
cnn_model.add(MaxPooling2D(pool_size=(2, 2)))
cnn_model.add(Flatten())
cnn_model.add(Dense(128, activation='relu'))
cnn_model.add(Dropout(0.5))
cnn_model.add(BatchNormalization())
cnn_model.add(Dense(NUM_CLASSES, activation='softmax'))
cnn_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 64)        640       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               401536    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
__________

In [18]:
cnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [19]:
cnn_tb_callback = keras.callbacks.TensorBoard(log_dir='./cnn_logs', histogram_freq=1, batch_size=32, 
                            write_graph=True, write_grads=True, write_images=True, 
                            embeddings_freq=0, embeddings_layer_names=None, 
                            embeddings_metadata=None)

In [20]:
cnn_model.fit(X_train, y_train, epochs=15, batch_size=16, validation_data=(X_test, y_test), callbacks=[cnn_tb_callback])

Train on 60000 samples, validate on 10000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x261a7689978>

Judging by the drop in accuracy at Epoch 15, the model is either thrashing a bit, or is beginning to overfit.  If this was a model I was planning on shipping to production, I'd attack this problem during hyperparameter tuning--likely by tweaking the dropout percentage rate, as well as seeing if adding a decay to the learning rate increases performance.  