# Image processing

Process images of text scans

In [None]:
import numpy
import os
import regex
import sys
from PIL import Image, ImageDraw
import xml.etree.ElementTree as ET
sys.path.append(os.getcwd() + '/..')
from scripts import read_transkribus_files
from IPython.display import clear_output

In [None]:
CORPUS_DIR = "../../data/Overlijden/"
COORDINATES_DIR = CORPUS_DIR + "x-samples/three-columns-100/corrected/"

TRANSPARENT_BACKGROUND = 255
COVERED_BACKGROUND = 128

In [None]:
def polygon2rectangle(coordinates):
    x_min, x_max, y_min, y_max = (1000000, 0, 1000000, 0)
    for x, y in coordinates:
        if x < x_min: x_min = x
        if x > x_max: x_max = x
        if y < y_min: y_min = y
        if y > y_max: y_max = y
    return x_min, y_min, x_max, y_max

In [None]:
# code based on https://stackoverflow.com/questions/22588074/polygon-crop-clip-using-python-pil

def mark_polygon(image, polygon, covered_background=COVERED_BACKGROUND):
    image_with_transparency = image.convert("RGBA")
    numpy_image = numpy.asarray(image_with_transparency)
    masked_image = Image.new('P', (numpy_image.shape[1], numpy_image.shape[0]), covered_background)
    ImageDraw.Draw(masked_image).polygon(polygon, outline=0, fill=TRANSPARENT_BACKGROUND)
    mask = numpy.array(masked_image)
    masked_numpy_image = numpy.empty(numpy_image.shape, dtype='uint8')
    masked_numpy_image[:,:,:3] = numpy_image[:,:,:3]
    masked_numpy_image[:,:,3] = mask
    return Image.fromarray(masked_numpy_image, "RGBA")

In [None]:
def make_image_file_name(coordinates_file_name):
    file_name_parts = coordinates_file_name.split()
    year_dir = " ".join(file_name_parts[:2])
    district_dir = regex.sub("\.$", "", " ".join(file_name_parts[:-1]))
    return os.path.join(CORPUS_DIR, year_dir, district_dir, regex.sub("xml$", "JPG", coordinates_file_name))

In [None]:
def get_coordinates_from_line(line):
    split_line = [ pair.split(",") for pair in line.split() ]
    return [ ( int(x), int(y) ) for x, y in split_line ]

In [None]:
def encloses_point(rectangle, point):
    return(rectangle[0] <= point[0] and rectangle[2] >= point[0] and 
           rectangle[1] <= point[1] and rectangle[3] >= point[1])

In [None]:
results = []
for coordinates_file_name in sorted(os.listdir(os.path.join(COORDINATES_DIR)))[:10]:
    image_file_name = make_image_file_name(coordinates_file_name)
    image = Image.open(image_file_name)
    coordinates_file_name = os.path.join(COORDINATES_DIR, coordinates_file_name)
    tree = ET.parse(coordinates_file_name)
    root = tree.getroot()
    text, meta_data = read_transkribus_files.get_text_from_xml(root)
    match_found = False
    for text_line in root.findall(".//{*}TextLine"):
        text_line_id = text_line.attrib["id"]
        for coords in text_line.findall("./{*}Coords"):
            polygon = get_coordinates_from_line(coords.attrib["points"])
            rectangle = polygon2rectangle(polygon)
            if encloses_point(rectangle, [ 600, 460 ]):
                marked_image = mark_polygon(image, polygon)
                display(marked_image.crop(rectangle))
                evaluation = input("is this correct? ")
                results.append((image_file_name, text_line_id, evaluation))
                match_found = True
                clear_output(wait=True)
        if match_found:
            break

In [None]:
results

In [None]:
# this works only with "jupyter notebook" not with "jupyter lab"

%matplotlib notebook
import matplotlib.pyplot as plt

In [None]:
def onclick(event):
    global ix, iy
    ix, iy = event.xdata, event.ydata
    text.set_text(f"{ix} {iy}")
    return

In [None]:
def show_scan():
    global text
    fig = plt.figure()
    plt.imshow(marked_image)
    text = fig.text(0,0,"filler")
    fig.canvas.mpl_connect('button_press_event', onclick)
    plt.plot()

In [None]:
show_scan()