## 0. Getting started

## 0.1 Import dependencies

In [1]:
import os
import cv2 as cv
import numpy as np
import tensorflow as tf
from alive_progress import alive_bar
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, MaxPooling2D, Dropout, BatchNormalization
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.models import Model

## 0.2 Define constants

In [2]:
EXAMPLE_PATH = os.path.join('data', 'handwritten_ex')
DATA_PATH = os.path.join('data', 'dataset', 'CompleteImages','All data (Compressed)')
TEST_DATA_PATH = os.path.join('data', 'test')
TRAIN_DATA_PATH = os.path.join('data', 'train')
MODELS_PATH = os.path.join('models','digit_cl')
INPUT_IMAGE_SIZE = (28,28)
N_CLASSES = 16

labels = ['%', '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '[', ']', '_']

## 0.3 Set memory growth

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
    print(gpu)

PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


## 1. Implement a handwritten character detector

## 1.1 Handling bounding box intersections

In [4]:
# calculate Jaccard index (IoU) of bounding boxes A and B
def bb_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2]+boxA[0], boxB[2]+boxB[0])
    yB = min(boxA[3]+boxA[1], boxB[3]+boxB[1])
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou


def bb_area(bb):
    _, _, w, h = bb
    return w * h


def detect_postprocess_bb(bounding_boxes):
    indices_rem = []
    for i in range(len(bounding_boxes) - 1):
        for j in range(i + 1, len(bounding_boxes)):
            iou = bb_iou(bounding_boxes[i], bounding_boxes[j])
            if iou > 0:
                areaI = bb_area(bounding_boxes[i])
                areaJ = bb_area(bounding_boxes[j])
                if areaI > areaJ:
                    indices_rem.append(j)
                else:
                    indices_rem.append(i)
    bounding_boxes = [i for j, i in enumerate(bounding_boxes) if j not in indices_rem]
    return bounding_boxes

## 1.2 Build detector

In [5]:
def detect_characters(image_path, kernel=(5, 5), show_results=False):
    img = cv.imread(image_path)
    gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    ret, binary = cv.threshold(gray, 127, 255, cv.THRESH_BINARY)
    opening = cv.morphologyEx(binary, cv.MORPH_CLOSE, kernel)
    contours, _ = cv.findContours(opening, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    bounding_boxes = []
    for contour in contours:
        area = cv.contourArea(contour)
        if area > 200 and area < 5000:
            x, y, w, h = cv.boundingRect(contour)
            bounding_boxes.append((x, y, w, h))
    bounding_boxes = detect_postprocess_bb(bounding_boxes)
    if show_results:
        for (x, y, w, h) in bounding_boxes:
            cv.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
            #cv.putText(img, f'({x},{y})', (x - 5, y - 5), cv.FONT_HERSHEY_SIMPLEX, 1.0, (200, 15, 0), 1)
        cv.imshow('Image', img)
        cv.imshow('Opening', opening)
        cv.waitKey(0)

    return sorted(bounding_boxes,key = lambda x: x[0]) # sorted by x value

In [6]:
files = os.listdir(EXAMPLE_PATH)
it_files = iter(files)

In [None]:
#Try it out...

file = next(it_files)
detect_characters(os.path.join(EXAMPLE_PATH, file), show_results=True)

## 2. Implement a handwritten character classifier

In [None]:
# Let's scale all bounding boxes to the same size!
# First, we need to assess how wide and how high should the bounding boxes be.
bb = []
for file in files:
    bb.extend(detect_characters(os.path.join(EXAMPLE_PATH, file)))

wmin = min(bb,key=lambda x: x[2])[2]

hmin = min(bb,key=lambda x: x[3])[3]

wmax = max(bb,key=lambda x: x[2])[2]
hmax = max(bb,key=lambda x: x[3])[3]

print((wmin,wmax),(hmin,hmax))

## 2.1 Preparing datasets

In [None]:
def training_preprocess(image_path,label):
    byte_image = tf.io.read_file(image_path)
    img = tf.io.decode_jpeg(byte_image)
    img = tf.image.resize(img, INPUT_IMAGE_SIZE)
    return img, label

In [None]:
#Get datasets...
class_ds_size=20000
for idx, label in enumerate(labels):
    ds = tf.data.Dataset.list_files(os.path.join(DATA_PATH, label, "*.png")).take(class_ds_size)
    labelling = tf.data.Dataset.from_tensor_slices(tf.ones(len(ds)) * idx, name='labels')
    data = tf.data.Dataset.zip((ds, labelling))
    data = data.map(training_preprocess)
    data = data.shuffle(buffer_size=1024)
    if idx == 0:
        train_data = data.take(int(len(data) * .7))
        test_data = data.skip(int(len(data) * .7))
        test_data = test_data.take(int(len(data) * .3))
    else:
        train_data = train_data.concatenate(data.take(int(len(data) * .7)))
        test = data.skip(int(len(data) * .7))
        test_data = test_data.concatenate(test.take(int(len(data) * .3)))
    train_data.cache()
    test_data.cache()
tf.data.experimental.save(train_data, TRAIN_DATA_PATH)
tf.data.experimental.save(test_data, TEST_DATA_PATH)
print(f'Train data length:{len(train_data)}, test data length: {len(test_data)}')

## 2.2 Image preprocessing and getting image slices

In [None]:
# dataset : https://www.kaggle.com/michelheusser/handwritten-digits-and-operators
def preprocess_img(image):
    #image = np.array(tf.image.resize_with_pad(image, INPUT_IMAGE_SIZE[1],INPUT_IMAGE_SIZE[0]))
    image = np.array(tf.image.resize(image, INPUT_IMAGE_SIZE))
    img = image / 255.0  # normalization
    return img

def get_digit_images(image_path,kernel=(5, 5)):
    digit_bbs = detect_characters(image_path,kernel=kernel)
    images = []
    img = cv.imread(image_path)
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    ret, binary = cv.threshold(img, 127, 255, cv.THRESH_BINARY)
    for bb in digit_bbs:
        (x,y,w,h)=bb
        char_img = tf.convert_to_tensor(binary[y:y + h, x:x + w])
        char_img = np.array(tf.reshape(char_img, [char_img.shape[0],char_img.shape[1],1]))
        images.append(preprocess_img(char_img))
    return images

In [None]:
#Try it out...

file = next(it_files)
images = get_digit_images(os.path.join(EXAMPLE_PATH, file))
for i, img in enumerate(images):
    cv.imshow(f'Image {i}',img)
cv.waitKey(0)

## 2.3 Define the model (and its variant in this case)

In [None]:
def create_digit_classifier():
    in_size = INPUT_IMAGE_SIZE
    input = Input(shape=(in_size[0], in_size[1], 1), name='input')
    conv_1 = Conv2D(64, (5, 5), activation='relu')(input)
    max_1 = MaxPooling2D(64, (2, 2), padding='same')(conv_1)

    conv_2 = Conv2D(32, (6, 6), activation='relu')(max_1)
    max_2 = MaxPooling2D(32, (2, 2), padding='same')(conv_2)

    conv_3 = Conv2D(16, (4, 4), activation='relu')(max_2)
    max_3 = MaxPooling2D(16, (2, 2), padding='same')(conv_3)

    flat_1 = Flatten()(max_3)
    dense_1 = Dense(16, activation='softmax')(flat_1)

    return Model(inputs=[input], outputs=[dense_1], name='digit_classifier')

def create_deeper_digit_classifier():
    in_size = INPUT_IMAGE_SIZE
    input = Input(shape=(in_size[0], in_size[1], 1), name='input')

    conv_1 = Conv2D(64, (4, 4), activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(
        input)
    max_1 = MaxPooling2D(64, (2, 2), padding='same')(conv_1)

    b_norm_1 = BatchNormalization(momentum=0.8)(max_1)

    conv_2 = Conv2D(64, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-5))(b_norm_1)
    max_2 = MaxPooling2D(32, (2, 2), padding='same')(conv_2)
    do_1 = Dropout(0.12)(max_2)

    conv_3 = Conv2D(128, (4, 4), activation='relu', padding='same', bias_regularizer=tf.keras.regularizers.l2(5e-5),
                    kernel_regularizer=tf.keras.regularizers.l2(1e-5))(do_1)
    max_3 = MaxPooling2D(32, (2, 2), padding='same')(conv_3)
    do_2 = Dropout(0.2)(max_3)

    conv_4 = Conv2D(128, (2, 2), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-5))(do_2)
    max_4 = MaxPooling2D(32, (2, 2), padding='same')(conv_4)
    do_3 = Dropout(0.2)(max_4)

    flat = Flatten()(do_3)
    dense_1 = Dense(512, activation='relu',
                    bias_regularizer=tf.keras.regularizers.l2(1e-4),
                    activity_regularizer=tf.keras.regularizers.l2(1e-5))(flat)

    dense_2 = Dense(128, activation='relu',
                    bias_regularizer=tf.keras.regularizers.l2(1e-5))(dense_1)
    b_norm_2 = BatchNormalization(momentum=0.8)(dense_2)

    dense_3 = Dense(64, activation='relu',
                    bias_regularizer=tf.keras.regularizers.l2(1e-5))(b_norm_2)
    do_4 = Dropout(0.2)(dense_3)

    dense_4 = Dense(N_CLASSES, activation='softmax',
                    bias_regularizer=tf.keras.regularizers.l2(1e-5),
                    activity_regularizer=tf.keras.regularizers.l2(1e-5))(do_4)

    return Model(inputs=[input], outputs=[dense_4], name='digit_classifier')

## 2.4 Train the model

In [None]:
@tf.function
def train_step(model, batch, loss, optimizer):
    with tf.GradientTape() as tape:
        X = batch[0]
        y_true = batch[1]
        y_pred = model(X, training=True)
        loss_v = loss(y_true, y_pred)
        grad = tape.gradient(loss_v, model.trainable_variables)
        optimizer.apply_gradients(zip(grad, model.trainable_variables))
        return loss_v

def train(data, epochs, lr = 5e-5):
    data = data.batch(100)
    data = data.prefetch(20)  # to prevent bottlenecking
    model = create_digit_classifier()
    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    optimizer = tf.keras.optimizers.Adam(lr)
    checkpoint_dir = "./checkpoints"
    checkpoint_prefix = os.path.join(checkpoint_dir, 'chkpt')
    checkpoint = tf.train.Checkpoint(opt=optimizer, model=model)

    for epoch in range(1, epochs + 1):
        print(f"Epoch {epoch}/{epochs}")
        with alive_bar(len(data)) as bar:
            for batch in data:
                train_step(model, batch, loss, optimizer)
                bar()
            if epoch % 10 == 0:
                checkpoint.save(file_prefix=checkpoint_prefix)
    model.save(os.path.join(MODELS_PATH, 'digit_classifier.h5'))
    return model

In [None]:
def test(model, data):
    data = data.batch(16)
    data = data.prefetch(8)
    recall = Recall()
    precision = Precision()
    for idx, batch in enumerate(data):
        X = batch[:2]
        y_true = batch[2]
        y_out = model.predict(X)
        y_pred = [1 if pred > 0.5 else 0 for pred in y_out]
        recall.update_state(y_true, y_pred)
        precision.update_state(y_true, y_pred)
        print(f'Batch {idx}')
        print(f'True: {y_true}\nPred: {y_pred}')
        print(f'Precision: {precision.result().numpy()}, recall: {recall.result().numpy()}\n')

def load_latest_model():
    return tf.keras.models.load_model(os.path.join(MODELS_PATH, 'digit_classifier.h5'))

def load_best_model():
    return tf.keras.models.load_model(os.path.join(MODELS_PATH, 'digit_classifier_best.h5'))

def get_expression(model, image_path, kernel=(5, 5)):
    digit_images = get_digit_images(image_path, kernel)
    y_out = model.predict(digit_images)
    y_pred = tf.math.argmax(y_out, axis=1)
    expression = ""
    for y in y_pred:
        expression += labels[y]
    return expression

## 3. Implement a solver

In [None]:
def op(digit1, operator, digit2):
    digit1, digit2 = float(digit1), float(digit2)
    if operator == '_':
        res = digit1 * digit2
    elif operator == '%':
        res = digit1 / digit2
    elif operator == '+':
        res = digit1 + digit2
    elif operator == '-':
        res = digit1 - digit2
    return res


def is_operator(char):
    return char == '+' or char == '-' or char == '%' or char == '_'


def is_float(slice):
    if is_operator(slice[0]):
        return False
    try:
        float(slice)
        return True
    except ValueError:
        return False


def numbers_op(exp: str, op: str):
    op_ind = exp.find(op)
    start_ind = -1
    end_ind = -1
    for ind in range(1, op_ind + 1):
        i = op_ind - ind
        slice = exp[i:op_ind]
        if is_float(slice):
            start_ind = i
        else:
            break
    for i in range(op_ind + 1, len(exp)):
        slice = exp[op_ind + 1:i + 1]
        if is_float(slice):
            end_ind = i
        else:
            break
    return exp[:start_ind], exp[start_ind:end_ind + 1], exp[end_ind + 1:]


def branch(exp, operator):
    f, s, t = numbers_op(exp, operator)
    res = my_solver(s)
    if f != "":
        first = my_solver(f[:-1])
        res = op(first, f[-1], res)
    if t != "":
        third = my_solver(t[1:])
        res = op(res, t[0], third)
    return res


def terminal(exp):
    ops = ['+', '-', '%', '_']
    brackets = ['[', ']']
    count_ops = {op: exp.count(op) for op in ops}
    count_br = {br: exp.count(br) for br in brackets}
    if sum(count_ops.values()) == 1 and sum(count_br.values()) == 0:
        for key in count_ops.keys():
            if count_ops[key] == 1:
                return key
    return None


def remove_brackets(exp):
    closed_ind = exp.find(']')
    open_ind = exp.rfind('[', 0, closed_ind)
    while open_ind != -1 and closed_ind != -1:
        bracket_exp = exp[open_ind + 1:closed_ind]
        left_exp = exp[:open_ind]
        right_exp = exp[closed_ind + 1:]
        exp = left_exp + str(my_solver(bracket_exp)) + right_exp
        open_ind, closed_ind = exp.rfind('['), exp.find(']')
    return exp


def my_solver(exp: str):
    operator = terminal(exp)
    if operator is not None:
        [first, second] = exp.split(operator)
        return op(first, operator, second)
    mul_ind = exp.find('_')
    div_ind = exp.find('%')
    if mul_ind < div_ind and mul_ind != -1:
        return branch(exp, '_')
    if div_ind != -1:
        return branch(exp, '%')
    add_ind = exp.find('+')
    sub_ind = exp.find('-')
    if add_ind < sub_ind and add_ind != -1:
        return branch(exp, '+')
    if sub_ind != -1:
        return branch(exp, '-')
    if exp != "":
        return int(exp)

In [None]:
def uglify(exp):
    new = exp.replace('/', '%')
    new = new.replace('*', '_')
    new = new.replace('(', '[')
    new = new.replace(')', ']')
    return new


def pretty(exp):
    new = exp.replace('%', '/')
    new = new.replace('_', '*')
    new = new.replace('[', '(')
    new = new.replace(']', ')')
    return new


def evaluate_expression(exp):
    return my_solver(remove_brackets(exp))


In [None]:
def solve_and_draw(model, image_path, kernel=(5, 5)):
    bbs = detect_characters(image_path, kernel=kernel)
    limits = [(x, y, x + w, y + h) for (x, y, w, h) in bbs]
    [_, _, x_max, y_max] = list(np.amax(limits, axis=0))
    [x_min, y_min, _, _] = list(np.amin(limits, axis=0))
    exp = get_expression(model, image_path, kernel=kernel)
    img = cv.imread(image_path)
    cv.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2)
    cv.putText(img, str(pretty(exp)), (x_min - 5, y_min - 5), cv.FONT_HERSHEY_SIMPLEX, 1.0, (200, 15, 0), 1)
    try:
        cv.putText(img, str(evaluate_expression(exp)), (x_max + 5, y_min - 5), cv.FONT_HERSHEY_SIMPLEX, 1.0,
                   (200, 15, 0), 1)
    except ValueError:
        print("Unable to evaluate expression")
    cv.imshow('Expression', img)
    cv.waitKey(0)
