In [1]:
import sys
sys.path.append('..')

In [2]:
import sqlite3
import warnings
from pathlib import Path
from itertools import groupby

from IPython.display import display
from ipywidgets import interact
from PIL import Image, ImageDraw

from digi_leap.pylib import db
from digi_leap.pylib import label_transforms as lt

In [3]:
DATA = Path('..') / 'data' / 'label-babel-2'
DB = DATA / 'label-babel-2.sqlite'

In [4]:
sql = """
    select *
    from cons
    join labels using (label_id)
    join qc_reconciled using(label_id)
    join sheets using (sheet_id)
    order by label_id, cons_run desc"""

rows = [dict(r) for r in db.select_records(DB, sql)]
GROUPS = groupby(rows, key=lambda r: r['label_id'])
GROUPS = [list(g) for _, g in GROUPS]

In [5]:
def get_label_image(record):
    with warnings.catch_warnings():  # Turn off EXIF warnings
        warnings.filterwarnings("ignore", category=UserWarning)
        path = Path('..') / record['path']
        sheet = Image.open(path)
        image = sheet.crop((record["label_left"], record["label_top"],
                            record["label_right"], record["label_bottom"]))
        return image

In [6]:
def transform_label(image):
    trans = lt.transform_label('deskew', image)
    trans = trans.convert('RGB')
    return trans

In [11]:
def compare(idx):
    group = GROUPS[idx]
    image = get_label_image(group[0])
    image = transform_label(image)
    print('=' * 30, group[0]['label_id'], '=' * 30)
    print(group[0]['errors'])
    print()
    for row in group:
        print('-' * 20, row['cons_run'], '-' * 20)
        print(row['cons_text'])
    print()
    display(image)


# interact(compare, idx=(0, len(GROUPS) - 1));

In [12]:
TEMP = Path('..') / 'data' / 'temp'

In [20]:
def output_groups():
    for group in GROUPS:
        image = get_label_image(group[0])
        image = transform_label(image)

        path = TEMP / f"{group[0]['label_id']}.jpg"
        image.save(path)

        path = path.with_suffix('.txt')
        with open(path, 'w') as text:
            text.write(f"{'=' * 30} {group[0]['label_id']} {'=' * 30}\n\n")
            text.write(f"Errors: {group[0]['errors']}\n\n")
            for row in group:
                text.write(f"{'-' * 20} {row['cons_run']} {'-' * 20}\n")
                text.write(f"{row['cons_text']}\n\n")


output_groups()

KeyboardInterrupt: 