In [1]:
import numpy as np
import cv2
import glob
import tensorflow as tf
import matplotlib.pyplot as pl
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Reshape, Flatten, Dense, Conv2D, Dropout
from tensorflow.keras.losses import sparse_categorical_crossentropy 
from tensorflow.keras.optimizers import Adam
from numpy import newaxis
from sklearn.utils import shuffle

In [2]:
#populating x_train and y_train with imgs and corresponding value from dataset

def populate(items):
    x_arr = []
    y_arr = []
    counter = 36

    for path in glob.glob("by_class/*"):
        for x in range(items):
            imgName = glob.glob(path + "/hsf_0/*.png")[x]
            img = cv2.imread(imgName, 0)
            
            newDim = (64,64)
            img = cv2.resize(img, newDim, interpolation = cv2.INTER_AREA)
            x_arr.append(img)
            
            # each folder name is the ascii value of the character 
            # -48 since 0 is the lowest ascii value we evaluate and it starts at 48
            y_arr.append((int(path[9:])-48))
        counter -= 1
        print(counter)
    
    #converting to numpy array
    x_train = np.array(x_arr)
    y_train = np.array(y_arr)
    
    #normalizing data
    x_train = x_train/255

    return (x_train, y_train)

In [3]:
#populating training data and shuffling
x_train, y_train = populate(150)
x_train, y_train = shuffle(x_train, y_train, random_state=0)

#creating a keras model using mostly convolution networks and dropouts
model = Sequential([
    Input((64, 64)),
    Reshape((64,64,1)),
    Conv2D(64, kernel_size=4, strides=1, activation='relu'),
    Conv2D(64, kernel_size=4, strides=2, activation='relu'),
    Dropout(0.5),
    Conv2D(128, kernel_size=4, strides=1, activation='relu'),
    Conv2D(128, kernel_size=4, strides=2, activation='relu'),
    Dropout(0.5),
    Conv2D(256, kernel_size=4, strides=1, activation='relu'),
    Conv2D(256, kernel_size=4, strides=2, activation='relu'),
    Flatten(),
    Dropout(0.5),
    Dense(43, activation='relu'),
    Dense(43, activation='softmax')
])

model.compile(loss=sparse_categorical_crossentropy, optimizer=Adam(0.001), metrics=['acc'])

model.summary()

35
34
33
32
31
30
29
28
27
26
25
24
23
22
21
20
19
18
17
16
15
14
13
12
11
10
9
8
7
6
5
4
3
2
1
0
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 64, 64, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 61, 61, 64)        1088      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 29, 29, 64)        65600     
_________________________________________________________________
dropout (Dropout)            (None, 29, 29, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 26, 26, 128)       131200    
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 12, 12, 128)       262272    
________________________

In [4]:
#5 repetitions, 10% of data as validation training data
history = model.fit(x_train, y_train, epochs=5, validation_split=0.1).history

Train on 4860 samples, validate on 540 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [5]:
#feeding an image to the model and making a prediction

def predict(img):
    
    newDim = (64, 64)
    img = cv2.resize(img, newDim, interpolation = cv2.INTER_AREA)
    
    #input expects shape (1, 64, 64) so newaxis is needed to change shape
    img = img[newaxis,:,:]
    prediction = model.predict_classes(img)
    
    #adding 48 since we removed 48 to reduce output possibilities
    return chr((prediction+48))

In [13]:
#loading a file and performing preprocessing on it. greyscale -> thresholding -> bilateral filtering

filename = 'cat.jpeg'

img = cv2.imread(filename)

imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 127, 255, cv2.THRESH_BINARY)

bilateral = cv2.bilateralFilter(thresh, 15, 75, 75) 

#getting contours
contours, hierarchy = cv2.findContours(bilateral, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)


In [14]:
#iterating through each contour
for x in range(len(contours)):
    [x, y, w, h] = cv2.boundingRect(contours[x])
    
    #disregarding very small contours
    if w < 35 and h < 35:
            continue
     
    #adding padding to the bounds
    y=int(y-(h/3))
    x=int(x-(w/3))
    
    w=int(w+(w/3)*2)
    h=int(h+(h/3)*2)
    
    #drawing rect boundaries around characters
    cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
    crop_img = bilateral[y:y+h, x:x+w]
    
    text = predict(crop_img)
    
    #drawing the prediction near each character
    image = cv2.putText(img, text, (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255) , 2, cv2.LINE_AA) 
    
#creating new file with predictions
cv2.imwrite('cat_prediction.jpeg', img)

True