In [28]:
import keras
from keras.layers import Dense  
from keras.models import Sequential  
import pickle
import gzip 
import numpy as np
from keras.layers.convolutional import Conv2D
from keras.layers import Dense
from keras.layers.convolutional import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dropout
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score
from PIL import Image
import os

In [2]:
num_classes = 10  
image_vector_size = 28*28  
image_size = 784 
input_shape = (1, 28, 28)

In [3]:
filename = 'mnist.pkl.gz'
f = gzip.open(filename, 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
f.close()

In [29]:
USPSMat  = []
USPSTar  = []
curPath  = 'USPSdata/USPSdata/Numerals'
savedImg = []

for j in range(0,10):
    curFolderPath = curPath + '/' + str(j)
    imgs =  os.listdir(curFolderPath)
    for img in imgs:
        curImg = curFolderPath + '/' + img
        if curImg[-3:] == 'png':
            img = Image.open(curImg,'r')
            img = img.resize((28, 28))
            savedImg = img
            imgdata = (255-np.array(img.getdata()))/255
            USPSMat.append(imgdata)
            USPSTar.append(j)

In [32]:
X_train = training_data[0]
Y_train = training_data[1]
X_test = test_data[0]
Y_test = test_data[1]
X_val = validation_data[0]
Y_val = validation_data[1]
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_val = X_val.reshape(X_val.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')
USPSMat = USPSMat.reshape(USPSMat.shape[0], 28, 28, 1).astype('float32') 
Y_train = keras.utils.to_categorical(Y_train, num_classes)
Y_val = keras.utils.to_categorical(Y_val, num_classes)
Y_test = keras.utils.to_categorical(Y_test, num_classes) 

In [7]:

# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

In [8]:
#fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [9]:
num_classes = Y_test.shape[1]

In [12]:
def baseline_model():
    # create model
    model = Sequential()
    model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [13]:
# build the model
model = baseline_model()
# Fit the model
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=20, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, Y_test, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Train on 50000 samples, validate on 10000 samples
Epoch 1/20
 - 27s - loss: 1.3253 - acc: 0.6417 - val_loss: 0.5344 - val_acc: 0.8510
Epoch 2/20
 - 26s - loss: 0.4603 - acc: 0.8663 - val_loss: 0.3691 - val_acc: 0.8948
Epoch 3/20
 - 26s - loss: 0.3702 - acc: 0.8906 - val_loss: 0.3180 - val_acc: 0.9093
Epoch 4/20
 - 27s - loss: 0.3281 - acc: 0.9021 - val_loss: 0.2881 - val_acc: 0.9152
Epoch 5/20
 - 26s - loss: 0.2917 - acc: 0.9126 - val_loss: 0.2498 - val_acc: 0.9248
Epoch 6/20
 - 27s - loss: 0.2619 - acc: 0.9221 - val_loss: 0.2271 - val_acc: 0.9326
Epoch 7/20
 - 26s - loss: 0.2315 - acc: 0.9314 - val_loss: 0.1928 - val_acc: 0.9429
Epoch 8/20
 - 28s - loss: 0.2041 - acc: 0.9386 - val_loss: 0.1706 - val_acc: 0.9484
Epoch 9/20
 - 27s - loss: 0.1812 - acc: 0.9456 - val_loss: 

In [19]:
PredictNN = model.predict(X_train)
PredNN = []
for i in range(0,50000):
    PredNN.append(np.argmax(PredictNN[i]))
    
accuracy_score(training_data[1],PredNN) #Training Accuracy


0.98172

In [20]:
confusion_matrix(training_data[1],PredNN) #Confusion Matrix for training

array([[4893,    3,    1,    0,    3,    4,   13,    1,    8,    6],
       [   1, 5632,   19,    2,    6,    0,    0,    6,    9,    3],
       [   9,   21, 4851,   15,   10,    0,    2,   27,   25,    8],
       [   8,   11,   27, 4940,    0,   22,    1,   23,   46,   23],
       [   1,   10,    3,    0, 4805,    0,   12,    2,    5,   21],
       [   8,   11,    2,   10,    2, 4402,   21,    4,   26,   20],
       [  13,    5,    1,    0,    9,    5, 4902,    1,   15,    0],
       [   3,   15,   19,    2,   15,    2,    0, 5074,    6,   39],
       [  10,   35,    7,    8,   17,   10,   14,    6, 4717,   18],
       [   6,   10,    0,   11,   43,    8,    1,   26,   13, 4870]])

In [24]:
PredictNN = model.predict(X_val)
PredNN = []
for i in range(0,10000):
    PredNN.append(np.argmax(PredictNN[i]))
    
accuracy_score(validation_data[1],PredNN) #Training Accuracy

0.9578

In [25]:
confusion_matrix(validation_data[1],PredNN) #Confusion Matrix for training

array([[ 984,    0,    0,    0,    1,    0,    1,    3,    0,    2],
       [   0, 1057,    1,    3,    0,    0,    0,    2,    1,    0],
       [   3,   12,  949,    7,    4,    0,    1,   14,    0,    0],
       [   2,    1,    2, 1011,    1,    6,    0,    2,    2,    3],
       [   0,    7,    0,    0,  970,    0,    0,    1,    0,    5],
       [   6,    1,    2,   13,    2,  874,    9,    0,    2,    6],
       [   5,    6,    0,    0,    8,    3,  944,    0,    1,    0],
       [   1,    2,    1,    1,    3,    0,    0, 1079,    0,    3],
       [   7,   51,    9,   32,    5,   11,    6,   19,  862,    7],
       [   3,   11,    0,    7,   40,    3,    0,   49,    0,  848]])

In [26]:
PredictNN = model.predict(X_test)
PredNN = []
for i in range(0,10000):
    PredNN.append(np.argmax(PredictNN[i]))
    
accuracy_score(test_data[1],PredNN) #Training Accuracy

0.9565

In [27]:
confusion_matrix(test_data[1],PredNN) #Confusion Matrix for training

array([[ 975,    0,    0,    1,    2,    0,    0,    1,    0,    1],
       [   0, 1131,    2,    0,    0,    0,    1,    0,    1,    0],
       [   7,    6,  990,   11,    4,    0,    0,   12,    1,    1],
       [   1,    0,    2,  989,    0,    5,    0,   10,    3,    0],
       [   0,    0,    0,    0,  977,    0,    0,    4,    0,    1],
       [   3,    1,    0,   10,    1,  868,    5,    1,    1,    2],
       [  11,    4,    1,    1,   12,    8,  921,    0,    0,    0],
       [   1,    3,    7,    0,    1,    0,    0, 1012,    1,    3],
       [  15,   31,   10,   33,   11,    8,   10,   23,  827,    6],
       [   5,   11,    1,    7,   41,    3,    0,   66,    0,  875]])

In [36]:
PredictNN = model.predict(USPSMat)
PredNN = []
for i in range(0,19999):
    PredNN.append(np.argmax(PredictNN[i]))
    
accuracy_score(USPSTar,PredNN) #Training Accuracy

0.47297364868243413

In [37]:
confusion_matrix(USPSTar,PredNN) #Confusion Matrix for training

array([[ 440,    4,   14,   59,  722,   26,   28,  120,   90,  497],
       [  70,  351,  217,   22,  713,   18,   12,  480,  104,   13],
       [  70,   19, 1476,  104,  144,   47,   35,   46,   48,   10],
       [  23,   11,  119, 1471,   23,  230,    0,   27,   90,    6],
       [   3,    4,   20,    4, 1417,   13,    3,  237,  265,   34],
       [  71,    9,   25,  144,   57, 1512,   18,   23,  109,   32],
       [ 316,   42,  270,   38,  178,  130,  855,    5,   70,   96],
       [  48,   41,   79,  442,   81,   25,    5, 1060,  191,   28],
       [ 185,    7,   74,  645,  145,  248,   29,  143,  454,   70],
       [   9,    4,   33,  278,  173,   22,    0,  708,  350,  423]])

In [35]:
PredictNN.shape

(19999, 10)