In [1]:
from modules.recognize_table import TableRecognizer
from helpers.config_helper import load_config


# 1. 載入設定檔
config_path = r'config.yaml'
config = load_config(config_path)

# 2. 建立表格辨識模組
tr = TableRecognizer(config)

In [2]:
from pathlib import Path

img_dir = r'data/train'
img_paths = list(Path(img_dir).rglob('*.jpg'))

In [3]:
def distance(p1, p2):
    
    x1, y1 = p1
    x2, y2 = p2
    
    return (((x2-x1) ** 2) + ((y2-y1) ** 2) ) ** 0.5


def to_coco(row_lines, col_lines, img_path, img_shape, img_data):
    
    h, w = img_shape
    min_rlen = w*0.07
    min_hlen = h*0.07
    
    # Points
    shapes = list()
    for rl in row_lines:
        
        x_umin = rl.x_start
        y_umin = rl.y_start
        x_umax = rl.x_end
        y_umax = rl.y_end
        
        # check length
        if distance((x_umin, y_umin), (x_umax, y_umax)) < min_rlen:
            continue
        
        x_dmin = x_umin
        y_dmin = min(y_umin + 2, h)
        x_dmax = x_umax
        y_dmax = min(y_umax + 2, h)
        

        shape = {
        'label': 'row',
        'points': [[x_umin, y_umin], [x_umax, y_umax], [x_dmax, y_dmax], [x_dmin, y_dmin]],
        'group_id': None, 
        "group_id": None,
          "description": "",
          "shape_type": "polygon",
          "flags": {}
        }
        
        shapes.append(shape)
    
    for cl in col_lines:
        
        x_umin = cl.x_start
        y_umin = cl.y_start
        x_umax = cl.x_end
        y_umax = cl.y_end
        
        
        if distance((x_umin, y_umin), (x_umax, y_umax)) < min_hlen:
            continue
        
        
        x_dmin = min(x_umin + 2, w)
        y_dmin = y_umin
        x_dmax = min(x_umax + 2, w)
        y_dmax = y_umax
        
        
        shape = {
        'label': 'col',
        'points': [[x_umin, y_umin], [x_umax, y_umax], [x_dmax, y_dmax], [x_dmin, y_dmin]],
        'group_id': None, 
        "group_id": None,
          "description": "",
          "shape_type": "polygon",
          "flags": {}
        }
        
        shapes.append(shape)
    
    
    coco = dict()
    coco['version'] = '5.2.0'
    coco['flags'] = {}
    coco['shapes'] = shapes
    coco['imagePath'] = img_path
    coco['imageData'] = img_data
    coco["imageHeight"] = img_shape[0]
    coco["imageWidth"] = img_shape[1]
    
    return coco

In [4]:
import base64
import cv2


def img2base64(img):
    
    retval, buffer = cv2.imencode('.jpg', img)
    jpg_as_text = base64.b64encode(buffer)
    
    return jpg_as_text.decode('utf-8')

In [6]:
import json
from utils import img_util


label_dir = r'outputs/auto_labeling/tableline/20231109'
for img_path in img_paths:
    
    # 1. 載入圖片
    img = img_util.load_img(img_path)
    
    # 2. 辨識表格
    # 修正方向
    img_direction_fix = tr.df.fix(img)

    # 3. 修正尺寸
    img_resize = tr.normalize_resize(img_direction_fix)

    # 4. 紙張變形修正
    img_paper_fix = tr.sf.fix(img_resize)

    # 5. 辨識表格線條
    table = tr.tlr.recognize(img_paper_fix)
    
    # 6. 產生標記資料
    img_data = img2base64(img_paper_fix)
    coco = to_coco(table.row_lines, table.col_lines, img_path.name, img.shape[:2], img_data)

    # 7. 儲存標記資料
    coco_path = Path(label_dir).joinpath(f'{img_path.stem}.json')
    
    with open(coco_path, 'w', encoding='utf-8') as f:
        json.dump(coco, f, ensure_ascii=False, indent=4)
