In [9]:
# ----------------------------------------------------------------
# PROGRAM FOR HANDWRITTEN DIGITS RECOGNITION USING CUSTOM DATASET
# Author - R K Sharma
# Github - https://github.com/rksharma007/
# ----------------------------------------------------------------

In [10]:
#imports

import os
import PIL
import cv2
import pathlib
import numpy as np
import tensorflow as tf
from PIL import Image, ImageOps
import matplotlib.pyplot as plt
from keras.utils import np_utils
from sklearn.utils import shuffle
from keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from sklearn.model_selection import train_test_split
from keras.layers.core import Activation, Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D

In [None]:
#defining path for dataset

PATH = os.getcwd()                              #gets path for current working directory
data_path = PATH + '/preprocessed_digits/'      #folder path for preprocessed images
data_dir_list = os.listdir(data_path)           #listing the directories in preprocessed images
print(data_dir_list)                            #displaying list in data directory

In [18]:
#defining image size, epochs and number of classes

img_rows=28            #number of rows
img_cols=28            #number of columns
num_epoch=20           #setting number of epochs
num_classes = 10       #number of classes

In [None]:
img_data_list=[]                                                      #array for storing image data list

#some preprocessing of images
for dataset in data_dir_list:
	img_list=os.listdir(data_path+'/'+ dataset)
	print ('Loaded the images of dataset-'+'{}'.format(dataset))
	for img in img_list:
		input_img=cv2.imread(data_path + '/'+ dataset + '/'+ img )    #reading images from diretory using OpenCV
		input_img=cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)         #converting images to grayscale
		input_img_resize=cv2.resize(input_img,(img_rows,img_cols))    #resizing images into 28*28
		img_data_list.append(input_img_resize)                        #appending processed image data into img_data_list


img_data = np.array(img_data_list)                                    #converting images into numpy array
img_data = img_data.astype('float32')                                 #loading array data type as float32
img_data /= 255                                                       #normalizing array between 0 and 1
print ('Image data shape - ',img_data.shape)                          #printing image data shape
img_data= np.expand_dims(img_data, axis=1)                            #expanding dimension of image data by 1
print ('Expanded dimensions of image data - ',img_data.shape)         #printing shape of expanded image data
img_data=np.rollaxis(img_data,3,1)                                    #rolling the array axis for reshaping image data
img_data=np.rollaxis(img_data,3,1)                                    #rolling the array axis for reshaping image data
print ('Reshaped image data - ',img_data.shape)                       #printing reshaped image data

In [None]:
num_of_samples = img_data.shape[0]                    #number of samples
print(num_of_samples)                                 #printing number of samples

#creating labels
labels = np.ones((num_of_samples,),dtype='int64')     #creating array for labels from number of samples
j = 0
k = 0
for i in data_dir_list:
    k = k+ len(os.listdir(data_path+i))
    labels[j:k]=i
    j = k

Y = np_utils.to_categorical(labels, num_classes)      #one hot encoding (categorizing 10 calsses using zeroes and ones)
x,y = shuffle(img_data, Y, random_state=2)            #randomly shuffling image data
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)   #splitting the dataset for train and test

input_shape=img_data[0].shape      #input shape for single image
print(input_shape)                 #printing input shape
print(X_train.shape)               #printing shape of training data
print(y_train.shape)               #printing shape of training labels
print(X_test.shape)                #printing shape of testing data
print(y_test.shape)                #printing shape of testing labels

In [86]:
#creating model
model = Sequential()                               #Sequential model is linear stack of layers
#adding layers
model.add(Conv2D(32,                               #Conv2D is number of filters that convulation layers will learn from (here 32)
                 kernel_size=(3, 3),               #Kernel size is the height and width of Conv2D window
                 activation='relu',                #RectifiedLinearUnit is a piecewise linear func that outputs the input if it is +ve otherwise 0
                 kernel_initializer='he_uniform',  #Controls the initialization method (he_uniform draws the sample from uniform distribution)
                 input_shape=input_shape))         #Giving the imput shape

model.add(Conv2D(64, (3, 3), activation='relu'))   #Second convulation layer (no. of filters =  64), activation = relu
model.add(MaxPooling2D(pool_size=(2, 2)))          #Pooling layers reduce the dimension of the feature maps
                                                   #MaxPooling covers maximum element of feature maps
model.add(Dropout(0.5))                            #Droput is used to ignore randomly selected neurons during training to prevent overfitting
model.add(Flatten())                               #Converting data into a 1-D array for inputting it to next layer
model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))  #Dense is fully connected layer, used for changing the dimension of the
                                                                           #vector, all the neurons in a layer are connected to those in next layer
                                                                           
model.add(Dropout(0.25))                           #Droput is used to ignore randomly selected neurons during training to prevent overfitting
model.add(Dense(num_classes, activation='softmax'))#Softmax is used as the activation function for multi-class classification (here 10)

In [None]:
#model compilatiion
model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9),
              metrics=['accuracy'])

#printing summary of model
model.summary()

In [None]:
#defining path to save checkpoints
checkpoint_path = '../task/checkpoints/cp.ckpt'  
checkpoint_dir = os.path.dirname(checkpoint_path)

#creating checkpoint callback
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=False, verbose=1)

#starting model training
hist = model.fit(X_train, y_train, batch_size=32, epochs=20, verbose=1, validation_data=(X_test, y_test), callbacks=[cp_callback])
print("The model has successfully trained")

#saving trained model
model.save('train_model.h5')
print("Saving the model as train_model.h5")

In [None]:
#printing validation loss and accuracy
score = model.evaluate(X_test, y_test, verbose=0)
print('Test Loss:', score[0], '  Test accuracy:', score[1])

In [90]:
def predict_digit(img):
    
    img = img.resize((28,28))           #resize image to 28x28 pixels
    img = img.convert('L')              #convert rgb to grayscale
    #img = PIL.ImageOps.invert(img)     #inverting image (for images with white background)
    img = np.array(img)                 #image to numpy array
    img = img.reshape(1,28,28,1)        #reshaping images
    img = img/255.0                     #normalizing
    
    res = model.predict([img])[0]                                 #predicting the digit
    print('Predicted Digit     :', np.argmax(res))                #printing predicted digit
    print('Prediction Accuracy :', round(100*max(res), 4), '%')   #printing prediction accuracy

In [None]:
#loading some images from directory for testing

img_path1 = '#load your digit'
test_image = Image.open(img_path1)
plt.imshow(test_image, cmap='gray')
predict_digit(test_image)