In [2]:
import tensorflow as tf
import numpy as np
import xml.etree.ElementTree as ET

In [None]:
def get_table_bbox(table_xml_path,  new_image_shape):
    
    """
    - Extract Table Coordinates from xml
    - Scale them w.r.t to new image shape
    
    Input:
        table_xml_path: str - xml path
        new_image_shape: tuple - (new_h, new_w)
    
    Output:
        table_bboxes: List 
    """

    tree = ET.parse(table_xml_path)
    root = tree.getroot()

    px0, py1, px1, py0 = list(map(lambda x: struct.unpack('!d', bytes.fromhex(x))[0], root.get("CropBox").split()))
    pw = abs(px1 - px0)
    ph = abs(py1 - py0)

    table_bboxes = []

    for table in root.findall(".//Composite[@Label='TableBody']"):
        x0p, y0m, x1p,y1m  = list(map(lambda x: struct.unpack('!d', bytes.fromhex(x))[0], table.get("BBox").split()))
        x0 = round(new_image_shape[1]*(x0p - px0)/pw)
        x1 = round(new_image_shape[1]*(x1p - px0)/pw)
        y0 = round(new_image_shape[0]*(py1 - y0m)/ph)
        y1 = round(new_image_shape[0]*(py1 - y1m)/ph)
        
        table_bboxes.append([x0,y0, x1,y1])
    return table_bboxes

In [None]:
def get_col_bbox(column_xml_path, prev_img_shape, new_image_shape, table_bboxes):
    
    """
    - Extract Column Coordinates from xml
    - Scale them w.r.t to new image shape and prev image shape
    - If there are no table_bboxes present , approximate them using column bbox
    
    Input:
        table_xml_path: str - xml path
        prev_img_shape: tuple - (new_h, new_w)
        new_image_shape: tuple - (new_h, new_w)
        table_bboxes: List - list of table bbox coordinates
    
    Output:
        table_bboxes: List 
    """
    
    tree = ET.parse(column_xml_path)
    root = tree.getroot()
    xmins = [round(int(coord.text) * new_image_shape[1] / prev_img_shape[1]) for coord in root.findall("./object/bndbox/xmin")]
    xmaxs = [round(int(coord.text) * new_image_shape[1] / prev_img_shape[1]) for coord in root.findall("./object/bndbox/xmax")]
    ymins = [round(int(coord.text) * new_image_shape[0] / prev_img_shape[0]) for coord in root.findall("./object/bndbox/ymin")]
    ymaxs = [round(int(coord.text) * new_image_shape[0] / prev_img_shape[0]) for coord in root.findall("./object/bndbox/ymax")]

    col_bbox = []
    for x_min, y_min, x_max, y_max in zip(xmins,ymins,xmaxs,ymaxs):
        bbox = [x_min, y_min, x_max, y_max]
        col_bbox.append(bbox)
    
    #fix 1: if no table coord but have column coord
    if len(table_bboxes) == 0:
        thresh = 3
        x_min = min([x[0] for x in col_bbox]) - thresh 
        y_min = min([x[1] for x in col_bbox]) - thresh 
        x_max = max([x[2] for x in col_bbox]) + thresh  
        y_max = max([x[3] for x in col_bbox]) + thresh 
        
        table_bboxes = [[x_min, y_min, x_max, y_max]]
    
    return col_bbox, table_bboxes

# Useful blog posts

https://medium.com/analytics-vidhya/table-extraction-using-deep-learning-3c91790aa200

https://asagar60.medium.com/tablenet-deep-learning-model-for-end-to-end-table-detection-and-tabular-data-extraction-from-b1547799fe29
