# Generate GeM annotation

## 1. Import the necessary packages.

In [1]:
# Computer vision
import cv2
import mahotas
import numpy as np

import imutils

# Optical character recognition
import pytesser

# Machine learning
from __future__ import print_function
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

# Logging
import logging
import warnings
from logging import FileHandler
from vlogging import VisualRecord

# Connected-component analysis
from skimage.filters import threshold_adaptive
from skimage import measure

# File handling
import codecs
import pickle

# GeM generator
from generator import describe, detect_roi

## 2. Set up the classifier

Load the pre-trained data and labels.

In [2]:
datafile = "data.db"
td_file = open(datafile, 'r')
data = pickle.load(td_file)

labelfile = "labels.db"
ld_file = open(labelfile, 'r')
labels = pickle.load(ld_file)

Split the data into training and testing data.

In [3]:
(trainData, testData, trainLabels, testLabels) = train_test_split(np.array(data), np.array(labels), test_size = 0.25, random_state = 42)

Set up a random forest classifier.

In [4]:
model = RandomForestClassifier(n_estimators = 20, random_state = 42)

Train the classifier.

In [5]:
model.fit(trainData, trainLabels)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
            oob_score=False, random_state=42, verbose=0, warm_start=False)

## 3. Prepare the document image.

In [6]:
(maskcontours, maskhierarchy) = detect_roi("test_images/2005-hwy-side_b-5.jpg")

Open the XML file.

In [None]:
layout_file_name = str(filename) + '-layout-1.xml'
xmlfile = codecs.open(layout_file_name, 'w', 'utf-8')

preamble = '<?xml version="1.0" encoding="UTF-8"?>\n\n'

Write preamble.

In [None]:
xmlfile.write(preamble)

In [None]:
original = image.copy()

oh = original.shape[0]
ow = original.shape[1]

segmentation = []
area_model = []
realization = []

for num, mc in enumerate(maskcontours):
    (x, y, w, h) = cv2.boundingRect(mc)
    if h <= (0.9 * oh):
        bounding_box = original[y:y+h, x:x+w]
        features = describe(bounding_box)
        prediction = model.predict(features)[0]
        if prediction == 'text':
            # Draw rectange on original image
            cv2.rectangle(original, (x, y), (x + w, y + h), (0, 0, 255), 1)
            # Describe layout unit
            textual_layout_unit = '\t\t<layout-unit id="lay-1.' + str(num + 1) + '"/>\n'
            # Describe sub-area
            textual_sub_area = '\t\t<sub-area id="sa-1.' + str(num + 1) + '" ' + 'startx="' + str(x) + '" ' + 'starty="' + str(y) + '" ' + 'endx="' + str(x + w) + '" ' + 'endy="' + str(y + h) + '"' + '/>\n'
            # Describe realization
            textual_realization = '\t\t<realization xref="lay-1.' + str(num + 1) + '" type="text"/>\n'
            # Append descriptions to list
            segmentation.append(textual_layout_unit)
            area_model.append(textual_sub_area)
            realization.append(textual_realization)
        if prediction == 'photo':
            # Draw rectange on original image
            cv2.rectangle(original, (x, y), (x + w, y + h), (0, 255, 0), 1)
            # Describe layout unit
            visual_layout_unit = '\t\t<layout-unit id="lay-1.' + str(num + 1) + '" alt="Photo"/>\n'
            # Describe sub-area
            visual_sub_area = '\t\t<sub-area id="sa-1.' + str(num + 1) + '" ' + 'startx="' + str(x) + '" ' + 'starty="' + str(y) + '" ' + 'endx="' + str(x + w) + '" ' + 'endy="' + str(y + h) + '"' + '/>\n'  
            # Describe realization
            visual_realization = '\t\t<realization xref="lay-1.' + str(num + 1) + '" type="photo" width="' + str(w) + 'px" height="' + str(h) + 'px"/>\n'
            # Append descriptions to list
            segmentation.append(visual_layout_unit)
            area_model.append(visual_sub_area)
            realization.append(visual_realization)
            
vlog(original, "RESULT 1: Contours detected in the contour mask")

## 8. Generate the GeM XML file

Generate annotation for layout layer segmentation.

In [None]:
segmentation_preamble = '\t<segmentation>\n'

xmlfile.write("".join(segmentation_preamble))

for s in segmentation:
    xmlfile.write("".join(s))
    
segmentation_terminate = '\t</segmentation>\n'

xmlfile.write("".join(segmentation_terminate))

Generate annotation for area model.

In [None]:
areamodel_preamble = '\t<area-model>\n'

xmlfile.write("".join(areamodel_preamble))

for a in area_model:
    xmlfile.write("".join(a))
    
areamodel_terminate = '\t</area-model>\n'

xmlfile.write("".join(areamodel_terminate))

Generate annotation for realization information.

In [None]:
realization_preamble = '\t<realization>\n'

xmlfile.write("".join(realization_preamble))

for r in realization:
    xmlfile.write("".join(r))
    
realization_terminate = '\t</realization>\n'

xmlfile.write("".join(realization_terminate))

In [None]:
xmlfile.close()