In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from IPython.display import Image 
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
import config
from copy import deepcopy
import numpy as np
from PIL import Image as pil_im
from PIL import ImageEnhance
from imageio import imwrite
from fastai.core import Path
from fastai.vision import load_learner, open_image
from fastai.vision import Image as fast_im
from time import time
import yaml

In [3]:
from image_graph import *
from line_bounds import *
from pred_handler import get_top_preds

<p style="font-size:16px;color:#FFB5A4">After all of the lines are split and stored in image files, we can split each one into it's individual characters <span style="font-size:13px;">(or sometimes character parts, or multiple characters -- see dataset generation notebook)</span></p>

In [4]:
line_source = Path('../greek_pages/line_images/')

config.model = load_learner('../models/', 'rn_34.pkl') # model to classify a character component
splitter_model = load_learner('../models/', 'split_model.pkl') # model to split components containing multiple characters

<p style="font-size:15px;color:#FFB5A4">For this notebook we will just parse one line, in general each line will be parsed with the following methodology and compiled together into a final text document</p>

In [5]:
line_path = line_source.ls()[1]; line_path

PosixPath('../greek_pages/line_images/line_1.jpg')

In [6]:
graph_path = Path('../greek_pages/line_graphs/')
graph_name = line_path.name.replace('.jpg','.txt')

In [7]:
img = get_image_array(line_path)
config.rows, config.cols = img.shape # set config parameters used by the graph processing algorithms

<p style="font-size:16px;color:#FFB5A4">Get a graph representation of the line as well as all the connected components - these represent the individual characters</p>

In [8]:
G = get_graph(graph_path, graph_name, line_path)
c = get_components(G)
with open('letter_map_inverse.yaml') as f: # get inverse letter-maps to convert from letter code to character
    lm_inv = yaml.load(f)

<p style="font-size:15px;color:#FFB5A4">For each component, predict the letter with the classifier (see training details for info on model)
    <br/><br/>
    If the top prediction is > .8 probability, append only that letter to the list of characters, else add the top two predictions.</p> 

In [9]:
s = []
for l in c:
    lb, ub, lbr, ubr = c[l]
    imwrite('temp.jpg', img[lbr:ubr, lb:ub])
    top_preds = get_top_preds('temp.jpg',2)
    if top_preds[0][1] > .8:
        ltrs = [top_preds[0][0]]
    else:
        ltrs = [top_preds[0][0], top_preds[1][0]]
    s.append((lb,ub,ltrs,l))

<p style="font-size:15px;color:#FFB5A4">Quick look at the predictions for the first few characters</p> 

In [10]:
sorted(s, key=lambda x: x[1])[:5]

[(147, 176, ['breathlessaccute'], 13611),
 (145, 180, ['α'], 75532),
 (180, 212, ['χ'], 78253),
 (212, 238, ['θ'], 40601),
 (239, 303, ['multi'], 75627)]

<p style="font-size:15px;color:#FFB5A4">Now we will create a first attempt at a text version of the line</p> 

<p style="font-size:15px;color:#FFB5A4">For each character, if it is a multi we will split it (see below) and then add each sub-image. If not we just add the letter, using its prediction, its left boundary (pxl number) and the right boundary of the character before. See function def below</p>

In [13]:
# run function definitions below
line = ''
srted = sorted(s, key=lambda x: x[1])
for i,ltr in enumerate(srted):
    if 'multi' in ltr[2]:
        lb, ub, lbr, ubr = c[ltr[-1]]
        imwrite('temp.jpg', img[lbr:ubr, lb:ub])
        sp_splits, mdl_split = get_mult(Path('temp.jpg'))
        l1, l2 = get_best_letters(sp_splits, mdl_split)
        line += add_letter(l1, srted[max(i-1,0)][1], ltr[0])
        line += add_letter(l2, ltr[0], ltr[0])
    else:
        if ltr[2] != ['noise']:
            line += add_letter(ltr[2], srted[max(i-1,0)][1], ltr[0])

<p style="font-size:15px;color:#FFB5A4">Here is the image of the line, as well as the text generated by the current model and parser. Currently I allow both options for predictions with prob. below .8 separating them by a / character. In a final conversion I would return the argmax. However the future version will use a language model to chose between the top 2 or 3 preds, multiplying the letter probability by the probability of the word generated with that letter. This will most likely not be an issue often in practice since often only one of the options will be an actual word.</p> <br/>
<p style="font-size:15px;color:#FFB5A4">As of now the model has primarily been trained on greek data with only a few english samples, so as I expand that data set the performance on english letters will improve dramatically.</p>

<img src="../greek_pages/line_images/line_1.jpg" style="width: 700px; height=150px;" />

In [14]:
line

'῎αχθο/υματ/t ᾽αχθ´εσομαι, ῀/.-, -./,, ῎ηχθηματ ᾽ηχθ´εσθην b/hecοme νeχed w/῍./-lτ/th (+ d/fαt/τ.) .'

<p style="font-size:15px;color:#FFB5A4">This next function attempts to split an image classified as 'multi' into two sub-characters using two methods:</p>
<ul style="font-size:15px;color:#FFB5A4">
    <li>a shortest path from the estimated split point to the bottom of the image</li>
    <li>a straight line from that point</li></ul>
<br/>
<p style="font-size:15px;color:#FFB5A4">the argmax probabilities of each the letters are multiplied together to get a score, the higher score is chosen as the correct split</p>

In [12]:
def get_mult(imname):
    im = pil_im.open(imname)
    enhancer = ImageEnhance.Contrast(im)
    enhanced_im = enhancer.enhance(4.0)
    newim_name = "{}_enhanced.jpg".format(imname.name)
    enhanced_im.save(newim_name)
    ip = splitter_model.predict(open_image(newim_name))[1][0][1] # get estimated split point
    frac = .5 + round(float(ip),2)
    i1, i2 = split_parts(open_image(newim_name), frac, ('md_l1.jpg','md_l2.jpg')) # gets straight line split
    
    im_arr = get_image_array(newim_name)
    G = get_line_graph(im_arr.T) 
    source = round(im_arr.T.shape[0] * frac)*im_arr.T.shape[1] + 1 
    sp = DijsktraSP(G, source) 
    sp_inds, totalweight = get_sp(G, s=source) 
    sp_inds.stack.append(source) 
    left,right = get_split_images(im_arr.T, sp_inds, 0, im_arr.T) # gets shortest path splits
    imwrite('sp_l1.jpg', left)
    imwrite('sp_l2.jpg', right)
    
    return ('sp_l1.jpg', 'sp_l2.jpg'), (i1, i2)


# gets the sub-letters generated by the two split methods with the higher score
def get_best_letters(sp_splits, mdl_splits):
    sp1 = get_top_preds(sp_splits[0], top=2)
    sp2 = get_top_preds(sp_splits[1], top=2)
    md1 = get_top_preds(mdl_splits[0], top=2)
    md2 = get_top_preds(mdl_splits[1], top=2)
    sp_score = sp1[0][1] * sp2[0][1]
    md_score = md1[0][1] * md2[0][1]
    if sp_score > md_score:
        sp1 = get_letter(sp1)
        sp2 = get_letter(sp2)
        return sp1, sp2
    else:
        md1 = get_letter(md1)
        md2 = get_letter(md2)
        return md1, md2
    

# if the top probability is greater than 80% return just the top letter, else return the top two
def get_letter(ltr): 
    if ltr[0][1] > .8:
        return [ltr[0][0]]
    else:
        return [ltr[0][0], ltr[1][0]]
    

# use the predicted split-point to return the two sub-characters
def split_parts(img, split_frac, names=None):
    left_im = img.data[:,:,:int(img.data.shape[2]*split_frac)+1]
    right_im = img.data[:,:,int(img.shape[2]*split_frac):]
    if names == None:
        return fast_im(left_im), fast_im(right_im)
    else:
        fast_im(left_im).save(names[0])
        fast_im(right_im).save(names[1])
        return names


# if the two characters are within 10 pxls of eachother, place them next to each other, else add a space
def add_letter(ltrs, lbound, rbound):
    l = ''
    if rbound - lbound > 10:
        l += ' '
    return l + '/'.join([lm_inv[l] for l in ltrs])