In [4]:
from pathlib import Path
DATA_DIRNAME = Path('/Users/sergeyk/Downloads/iamdb')

In [90]:
import xml.etree.ElementTree as ElementTree

DOWNSAMPLE_FACTOR = 2

def get_line_region(line, downsample_factor=DOWNSAMPLE_FACTOR):
    """
    line (xml element): has x, y, width, and height attributes
    downsample_factor (int): if images were downsampled, the regions must be, too
    """
    x1s = [int(el.attrib['x']) for el in line.findall('word/cmp')]
    y1s = [int(el.attrib['y']) for el in line.findall('word/cmp')]
    x2s = [int(el.attrib['x']) + int(el.attrib['width']) for el in line.findall('word/cmp')]
    y2s = [int(el.attrib['y']) + int(el.attrib['height']) for el in line.findall('word/cmp')]
    x1, y1, x2, y2 = min(x1s), min(y1s), max(x2s), max(y2s)
    return {
        'x1': x1 // DOWNSAMPLE_FACTOR,
        'y1': y1 // DOWNSAMPLE_FACTOR,
        'x2': x2 // DOWNSAMPLE_FACTOR,
        'y2': y2 // DOWNSAMPLE_FACTOR
    }

def get_regions_from_xml(filename):
    root = ElementTree.parse(filename).getroot()
    lines = root.findall('handwritten-part/line')
    regions = list(map(get_line_region, lines))
    return regions

filenames = (DATA_DIRNAME / 'xml').glob('*.xml')
regions_by_name = {
    filename.stem: get_regions_from_xml(filename)
    for filename in filenames
}

In [82]:
max_num_lines_by_name = max(len(v) for v in regions_by_name.values())
max_num_lines_by_name

13

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline

In [162]:
# Crop and further resize images

BUFFER = 50
DOWNSAMPLE_FACTOR = 4

filenames = list((DATA_DIRNAME / 'forms-resized').glob('*.jpg'))
heights = []
for region in regions_by_name.values():
    min_y1 = min(r['y1'] // DOWNSAMPLE_FACTOR for r in regions) - BUFFER
    max_y2 = max(r['y2'] // DOWNSAMPLE_FACTOR for r in regions) + BUFFER
    height = max_y2 - min_y1
    heights.append(height)
    
max_width = 1240 // DOWNSAMPLE_FACTOR
max_height = max(heights)
print(f'Max dims are {max_width} width and {max_height} height')

Max dims are 310 width and 392 height


In [164]:
CROP_OUTPUT_DIRNAME = Path('/Users/sergeyk/Downloads/iamdb/forms-cropped')
CROP_OUTPUT_DIRNAME.mkdir(exist_ok=True)
GT_OUTPUT_DIRNAME = Path('/Users/sergeyk/Downloads/iamdb/forms-cropped-gt')
GT_OUTPUT_DIRNAME.mkdir(exist_ok=True)
DEBUG_OUTPUT_DIRNAME = Path('/Users/sergeyk/Downloads/iamdb/forms-cropped-debug')
DEBUG_OUTPUT_DIRNAME.mkdir(exist_ok=True)

def write_cropped_and_gt_image(name, regions_by_name, max_height, downsample_factor):
    form_image = cv2.imread(str(DATA_DIRNAME / 'forms-resized' / f'{name}.jpg'), cv2.IMREAD_GRAYSCALE)
    form_image = cv2.resize(form_image, (0,0), fx=1/downsample_factor, fy=1/downsample_factor) 
    regions = regions_by_name[name]
    
    min_y1 = min(r['y1'] // downsample_factor for r in regions) - BUFFER
    max_y2 = max(r['y2'] // downsample_factor for r in regions) + BUFFER
    
    # Crop image
    form_image_crop = 255 * np.ones((max_height, max_width), dtype=np.uint8)
    try:
        form_image_crop[:(max_y2 - min_y1)] = form_image[min_y1:max_y2]
    except:
        print('Rescued')
        form_image_crop[:(max_y2 - min_y1)] = form_image[min_y1:max_y2 - 1]
    cv2.imwrite(str(CROP_OUTPUT_DIRNAME / f'{name}.jpg'), form_image_crop)
    
    # GT image
    gt_image = np.zeros_like(form_image_crop, dtype=np.uint8)
    for ind, region in enumerate(regions):
        gt_image[
            (region['y1'] // downsample_factor - min_y1):(region['y2'] // downsample_factor - min_y1),
            region['x1'] // downsample_factor:region['x2'] // downsample_factor
        ] = ind + 1
    cv2.imwrite(str(GT_OUTPUT_DIRNAME / f'{name}.png'), gt_image)
    
    # Debug image
    cmap = plt.get_cmap('Set1')
    form_image_crop = np.dstack([form_image_crop, form_image_crop, form_image_crop])
    for ind, region in enumerate(regions):
        color = [255 * _ for _ in cmap(ind)[:-1]]
        cv2.rectangle(
            form_image_crop,
            (region['x1'] // downsample_factor, region['y1'] // downsample_factor - min_y1),
            (region['x2'] // downsample_factor, region['y2'] // downsample_factor - min_y1),
            color,
            3
        )
    cv2.imwrite(str(DEBUG_OUTPUT_DIRNAME / f'{name}.png'), form_image_crop)

for filename in filenames:
    write_cropped_and_gt_image(filename.stem, regions_by_name, max_height, DOWNSAMPLE_FACTOR)

Rescued


ValueError: could not broadcast input array from shape (246,309) into shape (247,310)