In [1]:
import sys

sys.path.append('..')

In [2]:
from pathlib import Path

import numpy as np
import pytesseract
from ipywidgets import interact
from PIL import Image, ImageDraw, ImageEnhance, ImageOps
from scipy import ndimage
from skimage import morphology as morph
from skimage import measure, exposure, util, io, color, filters
from skimage import transform as xform
from scipy.ndimage import interpolation as inter

from digi_leap import label_image as li
from digi_leap.label_image import NEAR_HORIZ, NEAR_VERT

In [3]:
DATA_DIR = Path('..') / 'data'
LABELS_DIR = DATA_DIR / 'labels' / 'typewritten'
# LABELS_DIR = DATA_DIR / 'labels' / 'handwritten'

In [21]:
LABELS = sorted(LABELS_DIR.glob('*.jpg'))
# KEY = '4128362'
# KEY = '2995215'
KEY = '11783735'
KEY = '10667427'

### Get label image based on the given text

In [5]:
def get_label(key):
    paths = [i for i in LABELS if str(i).find(key) > -1]
    if not key or not paths:
        return
    label = Image.open(paths[0]).convert('L')
#     label = io.imread(paths[0])
#     label = color.rgb2gray(label)
    return label

### Try segmenting by looking for "blobs" of text

In [6]:
def test2(key):
    label1 = get_label(key)
    labe1l = ImageOps.scale(labe1l, 2.0)

    label1 = exposure.adjust_gamma(label1, gamma=1.5)
    label1 = xform.rescale(label1, 2.0, anti_aliasing=True)
    # label1 = exposure.equalize_adapthist(label1)

    label1_text = li.ocr_text(label1)
    print(label1_text)

    label2 = label1.copy()
    label2 = li.binarize(label2)

    h_lines = li.find_lines(label2, NEAR_HORIZ, line_length=100, line_gap=5)
    li.remove_horiz_lines(label2, h_lines, line_width=6,
                          window=8, threshold=2.0)

    label3 = label2.copy()
    label3 = morph.binary_closing(label3, morph.disk(10))
    label3 = morph.remove_small_objects(label3, min_size=256)
    label3 = morph.binary_dilation(label3, morph.disk(8))
    label3 = morph.remove_small_objects(label3, min_size=16)
    # display(li.to_pil(label3))

    label4 = label3.copy()
    label4, count = measure.label(label4, return_num=True)
    print(count)
    props = measure.regionprops(label4)

    for prop in props:
        r1, c1, r2, c2 = prop.bbox
        part = label1[r1:r2, c1:c2]
        binary = li.binarize(part)
        # part = exposure.adjust_gamma(part, gamma=2.0)
        part = ndimage.gaussian_filter(part, sigma=1)
        angle = li.find_skew(binary)
        part = inter.rotate(part, angle, mode='nearest')
        # part = xform.rotate(part, 1.0, resize=True, mode='minimum')
        # part = li.binarize(part)
        # part = util.invert(part)
        #         h_lines = part.find_horizontal_lines(line_length=100, line_gap=5)
        #         part.remove_horiz_lines(h_lines, line_width=6, window=8, threshold=2.0)
        #         part.data = morph.binary_closing(part.data, morph.disk(10))
        #         part.data = morph.remove_small_objects(part.data, min_size=8)
        #         part.data = morph.binary_dilation(part.data, morph.disk(8))
        display(li.to_pil(part))
        part_text = li.ocr_text(part)
        print(part_text)


# interact(test2, key=KEY)

### Try segmenting by looking for lines and phrases within lines

- Chop the label (as best we can) into lines.
- See if the lines work better with Tesseract

In [1]:
PADDING = 8


def test1(key):
    label = get_label(key)
#     label = ImageOps.scale(label, 2.0)

    binary = np.asarray(label).copy()
    threshold = filters.threshold_sauvola(binary, window_size=9, k=0.02)
#     threshold = filters.threshold_local(binary, 15)
#     threshold = filters.threshold_li(binary)
#     threshold = filters.threshold_yen(binary)
#     threshold = filters.threshold_otsu(binary)
    binary = binary < threshold
    binary = morph.remove_small_objects(binary, min_size=64)

    angle = li.find_skew(binary)
    binary = inter.rotate(binary, angle, reshape=False, order=0)

    data = np.asarray(label)
    data = inter.rotate(data, angle, reshape=False, order=0, mode='nearest')
#     label = Image.fromarray(data)
#     text = li.ocr_text(label)
#     print(text)

    rows = li.profile_projection(binary)
#     rows = li.merge_rows(rows, inside_row=8)

    image = li.to_pil(binary)
    image = image.convert('RGB')
    draw = ImageDraw.Draw(image)

    for top, bot in rows:
        draw.line((0, top, image.width, top), width=1, fill='red')
        draw.line((0, bot, image.width, bot), width=1, fill='yellow')
        row = label.crop((0, top - PADDING, label.width, bot + PADDING))
#         row = li.binarize(row, window_size=9, k=0.02)
#         row = morph.remove_small_objects(row, min_size=64)
#         h_lines = li.find_lines(row, NEAR_HORIZ, line_length=100, line_gap=5)
#         li.remove_horiz_lines(row, h_lines, line_width=6, window=10, threshold=2)
#         row = li.to_pil(row)
#         cont = ImageEnhance.Contrast(row)
#         row = cont.enhance(2.0)
#         brite = ImageEnhance.Contrast(row)
#         row = brite.enhance(2.0)
#         sharp = ImageEnhance.Sharpness(row)
#         row  = sharp.enhance(2.0)
#         display(row)
#         text = li.ocr_text(row)
#         print(text)

    display(image)


# interact(test1, key=KEY)