In [17]:
import cv2
import imutils
from imutils import paths
import numpy as np
import os

def normalize_dimensions(image, desired_width=20, desired_height=20):
    (h, w) = image.shape[:2]
    if w > h:
        image = imutils.resize(image, width=desired_width)
    else:
        image = imutils.resize(image, height=desired_height)
    width_padding = int((desired_width - image.shape[1]) / 2)
    height_padding = int((desired_height - image.shape[0]) / 2)
    WHITE = [255, 255, 255]
    image_with_border = cv2.copyMakeBorder(image, height_padding, height_padding, width_padding, width_padding, cv2.BORDER_CONSTANT, value=WHITE)
    image_with_border_resized = cv2.resize(image_with_border, (desired_width, desired_height), interpolation=cv2.INTER_AREA)
    return image_with_border_resized
    

In [18]:
images = []
labels = []

output_folder = "dataset/char_images"

for path in paths.list_images(output_folder):
    image = cv2.imread(path)
    image_normalized = normalize_dimensions(image)
    images.append(image_normalized)

    label = path.split(os.path.sep)[-2]
    labels.append(label)

In [19]:
X = np.array(images, dtype="float") / 255.0
labels = np.array(labels)

(20, 20, 3)

In [20]:
from sklearn.preprocessing import LabelBinarizer
label_binarizer = LabelBinarizer().fit(labels)
y = label_binarizer.transform(labels)

In [26]:
from tensorflow.keras import backend
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense

num_classes = len(set(labels))

In [27]:
CNN_model = Sequential()
CNN_model.add(
    Conv2D(20, (5, 5), padding="same", input_shape=(20, 20, 3), activation="relu"))
CNN_model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

CNN_model.add(
    Conv2D(20, (5, 5), padding="same", input_shape=(20, 20, 3), activation="relu"))
CNN_model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

CNN_model.add(Flatten())
CNN_model.add(Dense(512, activation="relu"))
CNN_model.add(Dense(num_classes, activation="softmax"))

In [28]:
CNN_model.compile(
    loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
)
CNN_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 20, 20, 20)        1520      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 10, 10, 20)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 10, 10, 20)        10020     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 5, 5, 20)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 500)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 512)               256512    
_________________________________________________________________
dense_5 (Dense)              (None, 33)               

In [29]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

In [30]:
CNN_model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=5, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7ff2dd2271f0>

In [32]:
import pickle

pickle.dump(label_binarizer, open("binarizer.pkl", "wb"))
CNN_model.save_weights("weights.h5")