# Generate GeM annotation

## 1. Import the necessary packages.

In [1]:
# Computer vision
import cv2
import mahotas
import numpy as np

import imutils

# Optical character recognition
import pytesser

# Machine learning
from __future__ import print_function
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

# File handling
import codecs

# GeM generator
from generator import describe, detect_roi, generate_text, load_model

## 2. Set up the classifier

Load the pre-trained data and labels.

In [2]:
model = load_model()

## 3. Prepare the document image.

In [3]:
maskcontours, maskhierarchy, filename = detect_roi("test_images/2005-hwy-side_b-5.jpg")

Open the XML file.

In [4]:
layout_file_name = str(filename) + '-layout-1.xml'
xmlfile = codecs.open(layout_file_name, 'w', 'utf-8')

preamble = '<?xml version="1.0" encoding="UTF-8"?>\n\n'

Write preamble.

In [5]:
xmlfile.write(preamble)

In [6]:
original = cv2.imread("test_images/2005-hwy-side_b-5.jpg")

oh = original.shape[0]
ow = original.shape[1]

segmentation = []
area_model = []
realization = []

for num, mc in enumerate(maskcontours):
    (x, y, w, h) = cv2.boundingRect(mc)
    if h <= (0.9 * oh):
        bounding_box = original[y:y+h, x:x+w]
        features = describe(bounding_box)
        prediction = model.predict(features)[0]
        if prediction == 'text':
            # Draw rectange on original image
            cv2.rectangle(original, (x, y), (x + w, y + h), (0, 0, 255), 1)
            # Generate XML entries
            lu, sa, re = generate_text(x, w, y, h, num)
            # Append descriptions to list
            segmentation.append(lu)
            area_model.append(sa)
            realization.append(re)
        if prediction == 'photo':
            # Draw rectange on original image
            cv2.rectangle(original, (x, y), (x + w, y + h), (0, 255, 0), 1)
            # Describe layout unit
            visual_layout_unit = '\t\t<layout-unit id="lay-1.' + str(num + 1) + '" alt="Photo"/>\n'
            # Describe sub-area
            visual_sub_area = '\t\t<sub-area id="sa-1.' + str(num + 1) + '" ' + 'startx="' + str(x) + '" ' + 'starty="' + str(y) + '" ' + 'endx="' + str(x + w) + '" ' + 'endy="' + str(y + h) + '"' + '/>\n'  
            # Describe realization
            visual_realization = '\t\t<realization xref="lay-1.' + str(num + 1) + '" type="photo" width="' + str(w) + 'px" height="' + str(h) + 'px"/>\n'
            # Append descriptions to list
            segmentation.append(visual_layout_unit)
            area_model.append(visual_sub_area)
            realization.append(visual_realization)

## 8. Generate the GeM XML file

Generate annotation for layout layer segmentation.

In [7]:
segmentation_preamble = '\t<segmentation>\n'

xmlfile.write("".join(segmentation_preamble))

for s in segmentation:
    xmlfile.write("".join(s))
    
segmentation_terminate = '\t</segmentation>\n'

xmlfile.write("".join(segmentation_terminate))

Generate annotation for area model.

In [8]:
areamodel_preamble = '\t<area-model>\n'

xmlfile.write("".join(areamodel_preamble))

for a in area_model:
    xmlfile.write("".join(a))
    
areamodel_terminate = '\t</area-model>\n'

xmlfile.write("".join(areamodel_terminate))

Generate annotation for realization information.

In [9]:
realization_preamble = '\t<realization>\n'

xmlfile.write("".join(realization_preamble))

for r in realization:
    xmlfile.write("".join(r))
    
realization_terminate = '\t</realization>\n'

xmlfile.write("".join(realization_terminate))

In [10]:
xmlfile.close()