# Introduction



The character recognition problem is implemented and compared with two algorithms / techniques with lots of preprocessing. The packages used to accomplish this task are cv2, NumPy, mahotas for preprocessing. Scikit-learn for feature extraction and implementing SVM (support vector machines) and Keras for implementing a CNN (Convolutional Neural Net). I am attaching two jupyter notebooks for this submission one for each algorithm. Just replace the file paths to chars74k-lite dataset to train and detection images 1 and 2 to test.


In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import mahotas
import glob
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from skimage import feature, exposure
import imutils
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.utils import to_categorical
from keras import layers
from keras.preprocessing.image import ImageDataGenerator

In [None]:
fp_chars = '/Desktop/chars74k-lite'
fp_detect1 = '/Desktop/detection-images/detection-1.jpg'
fp_detect2 = '/Desktop/detection-images/detection-2.jpg'

# Feature Engineering

After locating files with help of glob package, we read with opencv module and convert the images grayscale 3  channel to grayscale 1 channel. As first steps to preprocessing and feature extraction we try to remove noise, blurr the image to apply thresholding. we use otsu thresholding here as it working well after experimenting.

In [None]:

images = []
labels = []
for i in glob.glob(fp_chars+'/*'):
    for file in glob.glob(i+'/*.jpg'):
        #read the file in cv2
        image = cv2.imread(file)
        #convert from 3 channel to 1 channel
        image = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
        #remove noise
        image = cv2.fastNlMeansDenoising(image)
        #blur the image with 5,5 window
        image = cv2.GaussianBlur(image,(5,5),0)
        #Thresholding with otsu
        T = mahotas.thresholding.otsu(image)
        img = image.copy()
        img[img>T]=255
        img[img<255]=0
        img=cv2.bitwise_not(img) 
        #add image the list
        images.append(img)
        #add label of the image 
        labels.append(file[39])
        

In [None]:
img.shape


In [None]:
plt.imshow(images[6000])
plt.show()
labels[6000]

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt

def plot_training_score(history):
  print('Availible variables to plot: {}'.format(history.history.keys()))
  for key in history.history.keys():
    print(key)
    plt.plot(history.history[key])
    plt.title('model ' + key)
    plt.ylabel(key)
    plt.xla
    bel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [None]:
imarray = np.asarray(images)
uniq_labels = list(set(labels))
labels_int = [uniq_labels.index(i) for i in labels]

x_train, x_test, y_train, y_test = train_test_split(imarray, labels_int, shuffle=True, random_state=42)

# Split x_train and y_train into training and validation

x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, shuffle=True, random_state=42)

shape_train = x_train.shape
shape_validate = x_validate.shape
shape_test = x_test.shape

# normalize the data
# Flatten all images (Both training and testing images)
x_train_flatten = x_train.reshape(shape_train[0], shape_train[1], shape_train[2], 1)
x_train_flatten = x_train_flatten.astype('float32')/255

x_val_flatten = x_validate.reshape(shape_validate[0], shape_validate[1], shape_validate[2], 1)
x_val_flatten = x_val_flatten.astype('float32')/255

x_test_flatten = x_test.reshape(shape_test[0], shape_test[1], shape_test[2], 1)
x_test_flatten = x_test_flatten.astype('float32')/255

# Convert class vectors to binary class matrices (one-hot encoding)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_validate = to_categorical(y_validate)

# Create the model
def conv_model(img_width, img_height, channels):
    model = Sequential()  # Initalize a new model
    
    model.add(layers.Conv2D(128, (3,3),activation='relu', input_shape=(img_width, img_height, channels) ))
    model.add(layers.Conv2D(256, (3,3),activation='relu'))
    model.add(layers.Conv2D(512, (3,3),activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.35))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dense(26, activation='softmax'))  
    model.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

model = conv_model(20,20,1)

# Train the model, remeber to create a validation set

history = model.fit(x_train_flatten, y_train, epochs = 10, batch_size=16, validation_data = (x_val_flatten, y_validate))

# Plot the training using the helper function created in task 0

plot_training_score(history)

# Evaluate your model and print the score of the test set

test_loss, test_acc = model.evaluate(x_test_flatten, y_test)
score = test_acc

print('Test Accuracy :',score)


In [None]:
def detection(fp, clf, size=(20,20), model=model):
        image = cv2.imread(fp)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # blur the image, find edges, and then find contours along
        # the edged regions
        blurred = cv2.GaussianBlur(image, (5, 5), 0)
        edged = cv2.Canny(blurred, 30, 150)
        (_, cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # sort the contours by their x-axis position, ensuring
        # that we read the characters from left to right
        cnts = sorted([(c, cv2.boundingRect(c)[0]) for c in cnts], key = lambda x: x[1])

        # loop over the contours
        for (c, _) in cnts:
            # compute the bounding box for the rectangle
            (x, y, w, h) = cv2.boundingRect(c)

            # if the width is at least 5 pixels and the height
            # is at least 10 pixels, the contour is likely a digit
            if w >= 5 and h >= 10:
                # crop the ROI and then threshold the grayscale
                roi = image[y:y + h, x:x + w]
                roi = cv2.resize(roi, size)
                roi = cv2.fastNlMeansDenoising(roi)
                thresh = roi.copy()
                T = mahotas.thresholding.otsu(image)
                thresh[thresh > T] = 255
                thresh[thresh < 255] = 0
                thresh = cv2.bitwise_not(thresh)
                if clf=='svm':
                    hist = hog(thresh)
                    char = uniq_labels[model.predict([hist])[0]]
                elif clf=='cnn':
                    thresh = thresh/255.
                    thresh = thresh.reshape(1, size[0], size[1], 1)
                    char = model.predict_classes(thresh)
                    char = uniq_labels[int(char)]
                #print("I think that character is: {}".format(char))

                # draw a rectangle around the char, the show what the character is
                cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 1)
                cv2.putText(image, str(char), (x - 10, y - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
        plt.figure(figsize=(18,24))
        plt.imshow(image)
        plt.show()
        

In [None]:
detection(fp_detect1, clf='cnn', model=history)

In [None]:
detection(fp_detect2)