## Loading required libraries

In [1]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential, save_model, load_model
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Dropout
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Convolution2D
from keras.layers import Flatten
from keras.layers import LSTM
from keras.utils import to_categorical
from keras.utils import np_utils
from keras.optimizers import SGD
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import confusion_matrix

ModuleNotFoundError: No module named 'keras'

## Reading the data

In [27]:
train_data = pd.read_csv("data/emnist-balanced-train.csv", header = None)
test_data = pd.read_csv("data/emnist-balanced-test.csv", header = None)
train_data.head()
test_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,41,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,39,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,44,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Separating the response from the predictor variables in the training and testing data.

In [28]:
train_y = train_data[0]
train_y.head()
test_y = test_data[0]
test_y.head()

0    41
1    39
2     9
3    26
4    44
Name: 0, dtype: int64

In [35]:
train_X = train_data.iloc[:, 1:]
train_X.head()
test_X = test_data.iloc[:, 1:]
test_X.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,775,776,777,778,779,780,781,782,783,784
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Training and testing a simple neural network model

Creating a very simple neural network model with one hidden layer, training, and testing it on the data to see the performance and set up a baseline to improve upon with more complex neural nets later.

In [84]:
print(train_X.shape)

# # Partitioning the training data
# Xtr, Xts, Ytr, Yts = train_test_split(train_X, train_y, test_size=0.95, random_state=10)
# print(Xtr.shape, Xts.shape, Ytr.shape, Yts.shape)

# use full dataset
Xtr = train_X
Ytr = train_y
print(Xtr.shape, Xts.shape, Ytr.shape, Yts.shape)

(112800, 784)
(112800, 784) (107160, 784) (112800,) (107160,)


### Building the structure of the neural network

Defining the structure of the neural net: the size of the input layer, one hidden layer and the size of that layer, the method used for optimizing, the metric and the loss function.

In [87]:
# For a single-input model with 47 classes (categorical classification):
num_classes = 47 # number of classes present in the data
inp_dim = train_X.shape[1]
model_simple = Sequential()
model_simple.add(Dense(32, activation='relu', input_dim=inp_dim)) # First hidden layer
model_simple.add(Dense(num_classes, activation='softmax')) # output layer
model_simple.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Convert the response to one-hot encoding
one_hot_labels = to_categorical(Ytr, num_classes=num_classes)
one_hot_labels

array([[ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

### Training the model

Fit the model on the training data.

In [88]:
# Train the model, iterating on the data in batches of 32 samples
model_simple.fit(Xtr.values, one_hot_labels, epochs=150, batch_size=32, verbose = 0)

<keras.callbacks.History at 0x122b7cd4630>

### Testing the model performance on training data

Testing the performance of the model in the training data and looking at the training accuracy of the model using a confusion matrix.

In [101]:
# Accuracy on the training data
pred = model_simple.predict(Xtr.values)

# taking the maximum output as the predicted class label and then building the confusion matrix
cmat = confusion_matrix(pred.argmax(axis = 1), Ytr.values)
print(cmat)
print("accuracy on training data =", cmat.diagonal().sum()/cmat.sum())

[[   0    0    0 ...,    0    0    0]
 [   0    0    0 ...,    0    0    0]
 [   0    0    0 ...,    0    0    0]
 ..., 
 [   0    0    0 ...,    0    0    0]
 [   3   27    9 ...,   43 2169  468]
 [   0    0    0 ...,    0    0    0]]
accuracy on training data = 0.274875886525


The accuracy on the training data for the simple neural network model is 27.49%.

In [98]:
pred.argmax(axis = 1)
Ytr.values

array([45, 36, 43, ..., 23, 31,  8], dtype=int64)

### Saving and reloading the model

Saving the model to disk as an HDF5 file to be able to reload it later in the web application at the time of prediction.

In [100]:
model_simple.save('model_simple.h5')  # creates a HDF5 file '1H_32N_relu.h5'
del model  # deletes the existing model

# returns a compiled model identical to the previous one
model_simple2 = load_model('model_simple.h5')

## Adding more nodes in the hidden layer and including a dropout rate

Increasing the number of nodes in the hidden layer to make the model more complex and checking the performance of this new model. I have also included a drop out rate of 0.2 to avoid overfitting since I have increased the number of nodes in the hidden layer to 1024 from 32 previously. Dropout of 0.2 means that at the time of training, 20% of the nodes will be removed at random from the hidden layer for each pass of the gradient descent.

In [None]:
# For a single-input model with 47 classes (categorical classification):
num_classes = 47 # number of classes present in the data
inp_dim = train_X.shape[1]
model_dropout = Sequential()
model_dropout.add(Dense(1024, activation='relu', input_dim=inp_dim)) # First hidden layer
model_dropout.add(Dropout(0.2))
model_dropout.add(Dense(num_classes, activation='softmax')) # output layer
model_dropout.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Convert the response to one-hot encoding
one_hot_labels = to_categorical(Ytr, num_classes=num_classes)
one_hot_labels

In [None]:
model_dropout.fit(x_train, one_hot_labels, # Train the model using the training set...
          batch_size=512, nb_epoch=10,
          verbose=1, validation_split=0.1) # ...holding out 10% of the data for validation

In [None]:
print(model_dropout.evaluate(x_test, y_test, verbose=1)) # Evaluate the trained model on the test set!

## Convolutional Neural Network (CNN) Model

Implementing a convolutional neural network with the following structure:

1. Two convolutional layers with 32 channels each having size 3x3.
2. A max pooling layer with kernel size 2x2.
3. A fully connected layer.

Create a function to build the above described convolutional neural net. This function will return the model structure.

In [2]:
def buildConvNet(training_data, width=28, height=28, batch_size=256, epochs=10, verbose=False):
    ''' Build and train neural network. Also offloads the net in .yaml and the
        weights in .h5 to the bin/.
        Arguments:
            training_data: the packed tuple from load_data()
        Optional Arguments:
            width: specified width
            height: specified height
            batch_size: the size of the batch to perform gradient descent
            epochs: the number of epochs to train over
            verbose: enable verbose printing
    '''
    
    # Initialize data
    (x_train, y_train), (x_test, y_test), nb_classes = training_data
    input_shape = (height, width, 1)

    # Hyperparameters
    nb_filters = 32 # number of convolutional filters to use
    pool_size = (2, 2) # size of pooling area for max pooling
    kernel_size = (3, 3) # convolution kernel size

    model = Sequential()
    model.add(Convolution2D(nb_filters,
                            kernel_size,
                            padding='valid',
                            input_shape=input_shape,
                            activation='relu'))
    model.add(Convolution2D(nb_filters,
                            kernel_size,
                            activation='relu'))

    model.add(MaxPooling2D(pool_size=pool_size))
    model.add(Dropout(0.25))
    model.add(Flatten())

    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['accuracy'])

    if verbose == True: print(model.summary())
    return model

A function to train the model on the training data and save it to disk.

In [None]:
def train(model, training_data, width=28, height=28, callback=True, batch_size=256, epochs=10):
    (x_train, y_train), (x_test, y_test), nb_classes = training_data
    
    n_train = x_train.shape[0]
    x_train = x_train.reshape(n_train, width, height, )

    # convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(y_train, nb_classes)
    y_test = np_utils.to_categorical(y_test, nb_classes)

    if callback == True:
        # Callback for analysis in TensorBoard
        tbCallBack = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)

    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test),
              callbacks=[tbCallBack] if callback else None)

    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    
    model.save('CNN.h5')
    
#     saving the model to a yaml file instead of HDF5 file

#     model_yaml = model.to_yaml()
#     with open("bin/model.yaml", "w") as yaml_file:
#         yaml_file.write(model_yaml)
    
#     save_model(model, 'bin/model.h5')

## GPU support for Keras

Checking whether Keras backend is using CPU or GPU.

In [81]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/cpu:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 4762769127907959993
]
