In [119]:
# -*- coding: utf-8 -*-
"""
Created on Wed Nov  4 17:10:20 2020

@author: Tagore pothuneedi 
@Used code: PJ
"""

import matplotlib.pyplot as plt #to plot required plots
from skimage.transform import resize #to resize image 
from skimage.util import pad #to pad values in numpy array
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import os #to get paths of images
import cv2 #to read images
from sklearn.decomposition import PCA
scores={}


#function used to show the image
def show_image(image_path,title='Image',cmap_type='gray'):
    img_grey = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    plt.imshow(img_grey,cmap=cmap_type)
    plt.title(title)
    plt.axis('off')
    plt.show()

# function to get all file paths from provided directory
def get_list_of_files(dir_path):
    # create a list of file and sub directories 
    # names in the given directory 
    files = os.listdir(dir_path)
    all_files = list()
    # Iterate over all the entries
    for file in files:
        # Create full path
        fullPath = os.path.join(dir_path, file)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(fullPath):
            all_files = all_files + get_list_of_files(fullPath)
        else:
            all_files.append(fullPath)
                
    return all_files

#function to get folder names from provided path : folder name is later used as target label
def get_folder_name_list(dir_path):
   base_path_folder_list=[]
   for entry in os.listdir(dir_path):
       if os.path.isdir(os.path.join(dir_path, entry)):
           base_path_folder_list.append(entry) 
   return base_path_folder_list

#white padding : padwithone black padding:pad with zero
#used in preprocessing of image, for adding 2 pixel border from all sides
def padwithzeros(vector, pad_width, iaxis, kwargs):
    vector[:pad_width[0]] = 0
    vector[-pad_width[1]:] = 0
    return vector

def pre_process_image(img_path):
    #show_image(img_path,'greyimg')
    img_grey = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    
        
    # define a threshold, 128 is the middle of black and white in grey scale
    thresh = 128
    
    # threshold the image
    img_binary = cv2.threshold(img_grey, thresh, 255, cv2.THRESH_BINARY)[1]
    
    #invert the binary image to get clear image
    inverted_binary_image =  cv2.bitwise_not(img_binary)
    fimg = np.where(inverted_binary_image>0,1,inverted_binary_image)
    
    
    '''
    # can be used when image is not 32 * 32 pixel 
    resized_img = resize(inverted_binary_image,(32,32),anti_aliasing=False) # second parameter : scale here 2 double the size
    resized_img_with_padding = pad(resized_img,2,padwithzeros)
    fimg = np.where(resized_img_with_padding>0,1,resized_img_with_padding)
    '''
    #normalized_fimg = fimg / np.linalg.norm(fimg)
    normalized_fimg = fimg 
    
    #filters
    #sobelx = cv2.Sobel(fimg,cv2.CV_64F,1,0,ksize=5)
    #sobely = cv2.Sobel(fimg,cv2.CV_64F,0,1,ksize=5)
    #laplacian = cv2.Laplacian(fimg,cv2.CV_64F)
    
    return normalized_fimg.flatten()

#get list of preprocessed images
def load_images_list(img_paths):
    images=[]
    for img_path in img_paths:
        fimg = pre_process_image(img_path)
        images.append(fimg)
    return images

# base path for dataset
base_path=r"C:\Users\tagor\Desktop\ml\DevanagariHandwrittenCharacterDataset\Train"



#base_path_folder_list = get_folder_name_list(base_path)    
#print(base_path_folder_list)


#base_path_folder_list=['character_1_ka','character_2_kha','character_3_ga','character_4_gha','character_5_kna']
base_path_folder_list=['digit_0', 'digit_1', 'digit_2', 'digit_3', 'digit_4', 'digit_5', 'digit_6', 'digit_7', 'digit_8', 'digit_9']

df = pd.DataFrame()
i=0
for folder_name in base_path_folder_list:
    img_path=base_path+'/'+folder_name+'/'
    images_path_list = get_list_of_files(img_path)
    images_flattened = load_images_list(images_path_list)
    df_images = pd.DataFrame(images_flattened)
    df_images['label'] = i
    i+=1
    
    ##subdivide
    ##astract
    
    df=df.append(df_images,ignore_index=True)
    

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

#shuffle records in df
df = df.sample(frac = 1) 
X=df.iloc[:,df.columns != 'label']
y=df['label']


X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=.2,random_state=1111,stratify=y)


##################################################################################
#target_names=['character_1_ka','character_2_kha','character_3_ga']
target_names=base_path_folder_list.copy()


#testing random image
test_base_path = r'C:\Users\tagor\Desktop\ml\DevanagariHandwrittenCharacterDataset\Test'
test_img_path=test_base_path+'digit_7/5771.png'


def test_char(img_path):
    test_img = pre_process_image(img_path)
    test_img=test_img.reshape(1,-1)
    print("rfc: {0}".format(rfc.predict(test_img)))

    

In [120]:

##################
#CNN 
##################
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import numpy as np
from keras.layers.core import Activation
from keras.models import Sequential
import keras.backend as K


In [121]:
X_train.shape,X.shape

((13600, 1024), (17000, 1024))

In [122]:
batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 32,32

# the data, split between train and test sets

x_train = np.asarray(X_train).reshape(13600,32,32,1)
x_test = np.asarray(X_test).reshape(3400,32,32,1)

#x_train = X_train
#x_test = X_test

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

x_train shape: (13600, 32, 32, 1)
13600 train samples
3400 test samples


In [None]:
from keras.backend import sigmoid,tanh,exp
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Activation

def swish(x, beta = 2):
    return (x * sigmoid(beta * x))
def param_tanh(x):
    return x*tanh(x)
def relu_moid(x):
    #if x > 0.0:
    #    return x
    #else:
    #    return 1/1+exp(-x)

    return K.switch(x>0,x,sigmoid(x))

def param_sigmoid(x):
    return x/(1+exp(-x))
    #print(x)
    
    

get_custom_objects().update({'swish': Activation(swish)})
get_custom_objects().update({'param_tanh': Activation(param_tanh)})
get_custom_objects().update({'relu_moid': Activation(relu_moid)})
get_custom_objects().update({'param_sigmoid': Activation(param_sigmoid)})

# Relu_moid

In [124]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu_moid',input_shape=(32,32,1)))
model.add(Conv2D(64, (3, 3), activation='relu_moid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu_moid'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))


In [125]:
#optimizer=keras.optimizers.Adadelta(),
x_test.shape,y_test.shape,x_train.shape,y_train.shape

((3400, 32, 32, 1), (3400, 10), (13600, 32, 32, 1), (13600, 10))

In [126]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='Adam',
              metrics=['accuracy'])
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
scores['Relu_moid']=score[1]*100

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.06695754826068878
Test accuracy: 0.981176495552063


# param_tanh

In [142]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='param_tanh',input_shape=(32,32,1)))
model.add(Conv2D(64, (3, 3), activation='param_tanh'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='param_tanh'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))


In [143]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='Adam',
              metrics=['accuracy'])
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
scores['param_tanh']=score[1]*100

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.09002281725406647
Test accuracy: 0.9805882573127747


# param_sigmoid

In [129]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='param_sigmoid',input_shape=(32,32,1)))
model.add(Conv2D(64, (3, 3), activation='param_sigmoid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='param_sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))


In [132]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='Adam',
              metrics=['accuracy'])
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
scores['param_sigmoid']=score[1]*100

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.07156664878129959
Test accuracy: 0.9882352948188782


# Relu

In [134]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(32,32,1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))


In [135]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='Adam',
              metrics=['accuracy'])
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
scores['Relu']=score[1]*100

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.04123209789395332
Test accuracy: 0.9891176223754883


# tanh

In [137]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='tanh',input_shape=(32,32,1)))
model.add(Conv2D(64, (3, 3), activation='tanh'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='tanh'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [138]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='Adam',
              metrics=['accuracy'])
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
scores['tanh']=score[1]*100

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.06211266294121742
Test accuracy: 0.9832352995872498


# Sigmoid

In [145]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='sigmoid',input_shape=(32,32,1)))
model.add(Conv2D(64, (3, 3), activation='sigmoid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [146]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='Adam',
              metrics=['accuracy'])
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
scores['sigmoid']=score[1]*100

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 2.3036131858825684
Test accuracy: 0.10000000149011612


In [147]:
scores

{'Relu_moid': 98.1176495552063,
 'param_tanh': 98.05882573127747,
 'param_sigmoid': 98.82352948188782,
 'Relu': 98.91176223754883,
 'tanh': 98.32352995872498,
 'sigmoid': 10.000000149011612}