## 0. Getting started

## 0.1 Import dependencies

In [2]:
import cv2 as cv
import tensorflow as tf
import numpy as np
import os
from alive_progress import alive_bar
from tensorflow.keras.layers import Input, Layer, Conv2D, Dense, Flatten, MaxPooling2D
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.models import Model

## 0.2 Define path constants

In [None]:
EXAMPLE_PATH = os.path.join('data','handwritten_ex')
MODELS_PATH = 'models'

## 1. Implement a handwritten character detector

In [40]:
#calculate Jaccard index (IoU) of bounding boxes A and B
def bb_iou(boxA, boxB):
    yA = max(boxA[0], boxB[0])
    yB = min(boxA[1], boxB[1])
    xA = max(boxA[2], boxB[2])
    xB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA+1) * max(0, yB - yA+1) 
    boxAArea = (boxA[3] - boxA[2]+1) * (boxA[1] - boxA[0]+1)
    boxBArea = (boxB[3] - boxB[2]+1) * (boxB[1] - boxB[0]+1)
    iou = interArea / float(boxAArea + boxBArea - interArea)
    
    return iou

def bb_area(bb):
    _,_,w,h = bb
    return w*h

def postprocess_bb(bounding_boxes, threshold):
    indices_rem = []
    for i in range(len(bounding_boxes)-1):
        for j in range(i+1,len(bounding_boxes)):
            if bb_iou(bounding_boxes[i],bounding_boxes[j]) > threshold:
                areaI = bb_area(bounding_boxes[i])
                areaJ = bb_area(bounding_boxes[j])
                if areaI > areaJ:
                    indices_rem.append(j)
                    print(j)
                else:
                    indices_rem.append(i)
                    print(i)
    bounding_boxes = [i for j, i in enumerate(bounding_boxes) if j not in indices_rem]
    return bounding_boxes

In [None]:
def detect_characters(image_path,kernel = (5,5), show_results = False, threshold = 0.1):
    img = cv.imread(image_path)
    gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    ret, binary = cv.threshold(gray,127,255,cv.THRESH_BINARY)
    opening = cv.morphologyEx(binary,cv.MORPH_CLOSE,kernel)
    contours, _ = cv.findContours(opening, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    bounding_boxes = []
    for contour in contours:
        area = cv.contourArea(contour)
        if area > 100 and area < 5000:
            x,y,w,h = cv.boundingRect(contour)
            bounding_boxes.append((x,y,w,h))
    bounding_boxes = postprocess_bb(bounding_boxes, threshold)
    if show_results:
        for (x,y,w,h) in bounding_boxes:
            cv.rectangle(img,(x,y),(x+w,y+h),(0,0,255),2)
            #cv.putText(img, str(area), (x-5,y-5), cv.FONT_HERSHEY_SIMPLEX, 1.0,(200,15,0),1)
        cv.imshow('Image',img)
        cv.imshow('Opening',opening)
        cv.waitKey(0)
    return bounding_boxes

In [None]:
files = os.listdir(EXAMPLE_PATH)
it_files = iter(files)

In [None]:
#Try it out...

file = next(it_files)
detect_characters(os.path.join(EXAMPLE_PATH,file), show_results = True)

## 2. Implement a handwritten character classifier

## 3. Implement a solver