# Import

In [7]:
import numpy as np
import pandas as pd
from numpy import reshape
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

# Mnist dataset

In [8]:
data_train = pd.read_csv("train.csv")
data_test = pd.read_csv("test.csv")

In [9]:
from matplotlib import pylab, gridspec
from matplotlib import pyplot as plt

# X_train and y_train

In [10]:
X_train = data_train.drop(["label"], axis = 1)
y_train = pd.get_dummies(data_train["label"])

In [11]:
X_train = X_train.values.reshape(X_train.shape[0], 28, 28, 1)


# Keras CNN model

In [12]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.callbacks import ReduceLROnPlateau

Using TensorFlow backend.


In [13]:
model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(28,28,1)))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) # 2 на 2 матрица
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))  

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [14]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 10, 10, 64)        18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 8, 8, 64)          36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 4, 4, 64)          0         
__________

# Model.fit

In [15]:
lr_reduce = ReduceLROnPlateau(monitor='val_acc', factor=0.1, epsilon=0.0001, patience=1, verbose=1)
model.fit(X_train, y_train, batch_size=128, epochs=19, verbose=1, validation_split=0.3, callbacks=[lr_reduce])

Train on 29399 samples, validate on 12601 samples
Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.1.
Epoch 8/19
Epoch 9/19
Epoch 10/19

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.010000000149011612.
Epoch 11/19

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 12/19

Epoch 00012: ReduceLROnPlateau reducing learning rate to 9.999999310821295e-05.
Epoch 13/19

Epoch 00013: ReduceLROnPlateau reducing learning rate to 9.999999019782991e-06.
Epoch 14/19

Epoch 00014: ReduceLROnPlateau reducing learning rate to 9.99999883788405e-07.
Epoch 15/19

Epoch 00015: ReduceLROnPlateau reducing learning rate to 9.99999883788405e-08.
Epoch 16/19

Epoch 00016: ReduceLROnPlateau reducing learning rate to 9.999998695775504e-09.
Epoch 17/19

Epoch 00017: ReduceLROnPlateau reducing learning rate to 9.99999905104687e-10.
Epoch 18/19

Epoch 00018: ReduceLROnPlateau redu

<keras.callbacks.History at 0x7f4e2344cbe0>

In [16]:
from random import random

In [17]:
X_test = data_test.values.reshape(data_test.shape[0], 28, 28, 1)

# Wrong pred and True predict percent

In [18]:
pred2 = model.predict(X_train).argmax(1)
data_wrong_pred = data_train[data_train["label"] != pred2]

In [79]:
truepredpercent = 100 - len(data_wrong_pred) / len(data_test) * 100
print('True predict percent: ' + str(truepredpercent) + ' %')

True predict percent: 98.85357142857143 %


# Custom mnist dataset

In [25]:
from PIL import Image
from os import listdir
from sklearn.metrics import accuracy_score
import numpy as np
def readImages():
    answers = list()
    true = list()
    for i in range(10):
        l = listdir("./hr/{}/".format(i))
        true.extend([int(x[0]) for x in l])
        for image in l:
            img = np.array([255 - x for x in np.asarray(Image.open("./hr/{}/{}".format(i, image)).convert("L")).reshape(784,)]).reshape(28, 28)
            answers.append(np.argmax(model.predict(np.array([img]).reshape(1, 28, 28, 1))[0]))
    return accuracy_score(true, answers)
k =readImages()
print('Accuracy: '+ str(k))

Accuracy: 0.68


In [26]:
def readImages():
    answers = list()
    true = list()
    l = listdir("./hr/12345/")
    true.extend([int(x[0]) for x in l])
    for image in l:
        img = np.array([255 - x for x in np.asarray(Image.open("./hr/12345/{}".format(image)).convert("L")).reshape(784,)]).reshape(28, 28)
        answers.append(np.argmax(model.predict(np.array([img]).reshape(1, 28, 28, 1))[0]))
    return accuracy_score(true, answers)
f = readImages()
print('Accuracy: '+ str(f))

Accuracy: 0.79
