# Remove Boxes Demo

In [None]:
%load_ext autoreload
%autoreload 2

This notebook shows how to remove boxes from a form as these boxes can cause OCR misreads if the text overlaps with the boundaries of the box

In [None]:
import sys
import os
sys.path.append(".")
import matplotlib.pyplot as plt
from requests import get, post, delete
import time

import cv2
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
from IPython.display import display

%matplotlib inline 

__root_common__ = 'common.py'
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__root_common__), '..')))

__train_file__ = 'autolabel_training.py'
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__train_file__), '..')))

import numpy as np
from boxdetect import config
from common.common import compute_partial_ratio, compute_ratio, get_text_from_ocr, score_and_rank, \
apply_erosion, apply_dilatation, get_projection, load_image, find_runs, analyze_runs
from Training.Auto_Labelling.basic_implementation.autolabel_training import call_ocr

best_score = {}

config.thickness = 3
DATA_PATH = '../Data/'
file_name = 'Gedoc_1 (2).tiff'


In [None]:
# scaling_factors param is the one that affects performance the most
# - it defines all the scaling factors that should be used while processing to search for rectangles
# - the more and more diverse the better 
# - smaller value -> faster processing time
# - going below 0.4 is risky because image can turn blury and might generate more false positives
print("Default scaling factors: ", config.scaling_factors)


# Now we autodetect boxes on the form - this is a poor quality image

In [None]:
from boxdetect.pipelines import process_image

rects, grouping_rects, img, output_image = process_image(
    os.path.join(DATA_PATH, file_name), config=config, plot=False)

## Each rectangle is a big region of box rectangles
 (x, y, w, h)

In [None]:
grouping_rects

## Let's see what has been detected 

In [None]:
plt.figure(figsize=(25,25))
plt.imshow(output_image)

# Here you can interactively extract each section that the detect boxes has identified - use this to tune the sensitivity of the region extraction

In [None]:
from ipywidgets import interact

@interact(i=(0, len(grouping_rects)-1))
def extract_region(i):
    
    window1 = 1   # Tweak these sensitivity values
    window2 = 0.01  # Tweak these sensitivity values
    
    y1 = int(grouping_rects[i][1] + (window1 * grouping_rects[i][3]))   
    y2 = int(grouping_rects[i][1] - (window2 * grouping_rects[i][3]))
    x1 = int(grouping_rects[i][0] + (window1 * grouping_rects[i][2]))
    x2 = int(grouping_rects[i][0] - (window2 * grouping_rects[i][2]))
    roi = output_image[y2:y1, x2:x1]
    plt.imshow(roi)
    


# Let's drilldown on the City field 

Experiment with the values for window1 and window2 below to fit the region snugly with the detected boxes

In [None]:
window1 = 1   # Tweak these sensitivity values
window2 = 0.01  # Tweak these sensitivity values

# Let's take the city field as an example
i = 1
    
y1 = int(grouping_rects[i][1] + (window1 * grouping_rects[i][3]))   
y2 = int(grouping_rects[i][1] - (window2 * grouping_rects[i][3]))
x1 = int(grouping_rects[i][0] + (window1 * grouping_rects[i][2]))
x2 = int(grouping_rects[i][0] - (window2 * grouping_rects[i][2]))
roi = output_image[y2:y1, x2:x1]

# Let's save the image
image = cv2.imread(os.path.join(DATA_PATH, file_name))
roi = image[y2:y1, x2:x1]
active_file = 'city.jpg' 
saved = cv2.imwrite(os.path.join(DATA_PATH, active_file), roi)

assert saved == True
print(f"{DATA_PATH + active_file} saved")

## Set up our environment variables

In [None]:
# We will set up some of the environment variables here and others later in the notebook to keep things clear
# Set the values here marked with SET THIS HERE
class Config:
    """
    Read from .env file
    """
    REGION = 'eastus'  # The region Form Recognizer and OCR are deployed
    SUBSCRIPTION_KEY = ''  # CogSvc key frautolabel | Keys and Endpoint

    

# Let's call OCR and score against the Ground Truth - Enter the GT here

In [None]:
GT = 'ADD YOUR VALUE HERE'    # This is the ground truth value for the field
result = call_ocr(DATA_PATH, active_file, 'en', Config.REGION, Config.SUBSCRIPTION_KEY, 'image/jpeg')
best_score, top_score = score_and_rank(active_file, GT, result, best_score)

## Let's get our baseline score

In [None]:
baseline_score = top_score[0][1]
print(f"Baseline score {baseline_score}")

# Let's find the best performing image transformation

In [None]:
image = cv2.imread(os.path.join(DATA_PATH, active_file))
gray = cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]


# Remove vertical
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,25))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (255,255,255), 2)
    
# Repair image
repair_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,6))
result = 255 - cv2.morphologyEx(255 - image, cv2.MORPH_CLOSE, repair_kernel, iterations=1)

saved = cv2.imwrite(os.path.join(DATA_PATH, 'thresh.jpg'), thresh)
assert saved == True
print(f"{DATA_PATH + 'thresh.jpg'} saved")
saved = cv2.imwrite(os.path.join(DATA_PATH, 'detected.jpg'), detected_lines)
assert saved == True
print(f"{DATA_PATH + 'detected.jpg'} saved")
saved = cv2.imwrite(os.path.join(DATA_PATH, 'roi.jpg'), roi)
assert saved == True
print(f"{DATA_PATH + 'roi.jpg'} saved")


active_file = 'roi.jpg'
result = call_ocr(DATA_PATH, active_file, 'en', Config.REGION, Config.SUBSCRIPTION_KEY, 'image/jpeg')
best_score, top_score = score_and_rank(active_file, GT, result, best_score)

inverted_active_file = 'thresh.jpg'
result = call_ocr(DATA_PATH, inverted_active_file, 'en', Config.REGION, Config.SUBSCRIPTION_KEY, 'image/jpeg')
best_score, top_score = score_and_rank(active_file, GT, result, best_score)

for i in range(2):
    i+=1

    active_file = apply_dilatation(DATA_PATH, active_file, i)
    result = call_ocr(DATA_PATH, active_file, 'en', Config.REGION, Config.SUBSCRIPTION_KEY, 'image/jpeg')
    best_score, top_score = score_and_rank(active_file, GT, result, best_score)

    active_file = apply_erosion(DATA_PATH, active_file, i)
    result = call_ocr(DATA_PATH, active_file, 'en', Config.REGION, Config.SUBSCRIPTION_KEY, 'image/jpeg')
    best_score, top_score = score_and_rank(active_file, GT, result, best_score)

    active_file = apply_erosion(DATA_PATH, inverted_active_file, i)
    result = call_ocr(DATA_PATH, active_file, 'en', Config.REGION, Config.SUBSCRIPTION_KEY, 'image/jpeg')
    best_score, top_score = score_and_rank(inverted_active_file, GT, result, best_score)



In [None]:
print(f"Baseline score {baseline_score}")
print(f"Best performing image {top_score[0][0]} {top_score[0][1]}")