In [9]:
import csv
import json
from itertools import cycle
from pathlib import Path

import easyocr
import pytesseract
from IPython.display import display
from ipywidgets import interact
from PIL import Image

from digi_leap.const import TESS_CONFIG, CHAR_BLACKLIST
import digi_leap.label_transforms as lt
from digi_leap.ocr import ocr_label

In [2]:
DATA_DIR = Path.cwd() / 'data'
LABELS_1 = DATA_DIR / 'labels-1'
TYPEWRITTEN = LABELS_1 / 'typewritten'

In [3]:
EASY_OCR = easyocr.Reader(['en'])

In [4]:
LABELS = sorted(TYPEWRITTEN.glob('*.jpg'))

In [5]:
XFORM = {
    'scale': lt.Scale(),
    'rotate': lt.Rotate(),
    'deskew': lt.Deskew(),
    'rank_modal': lt.RankModal(),
    'rank_mean': lt.RankMean(),
    'blur': lt.Blur(),
    'exposure': lt.Exposure(),
    'binarize': lt.Binarize(),
    'remove_small_holes': lt.BinaryRemoveSmallHoles(area_threshold=24),
    'binary_opening': lt.BinaryOpening(),
    'thin': lt.BinaryThin(max_iter=2),
}

In [None]:
def xform(idx):
    path = LABELS[idx]
    original = Image.open(path)
    image = lt.LabelTransform.as_array(original)

    image = XFORM['scale'](image)
    image = XFORM['rotate'](image)
    image = XFORM['deskew'](image)
    image = XFORM['rank_mean'](image)
#     image = XFORM['blur'](image, sigma=1)
#     image = XFORM['exposure'](image)
    image = XFORM['binarize'](image)
    image = XFORM['remove_small_holes'](image)
    image = XFORM['thin'](image)
    image = XFORM['binary_opening'](image)
    image = lt.LabelTransform.to_pil(image)

    print(pytesseract.image_to_string(image, config=TESS_CONFIG))
    print(EASY_OCR.readtext(image, blocklist=CHAR_BLACKLIST))
    display(image)
    display(original)


interact(xform, idx=(0, len(LABELS)))
# xform(46)
# xform(2497)

interactive(children=(IntSlider(value=2496, description='idx', max=4992), Output()), _dom_classes=('widget-int…

In [7]:
def get_text(idx):
    path = LABELS[idx]
    original = Image.open(path)

    score = ocr_label(original)

    print(pytesseract.image_to_string(score.image))
    display(score.image)
    display(original)


# interact(get_text, idx=(0, len(LABELS) - 1))
# get_text(2785)
# get_text(46)

In [8]:
def show_label(idx):
    path = LABELS[idx]
    original = Image.open(path)
    print(path)
    display(original)


# interact(show_label, idx=(0, len(LABELS) - 1))