In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from IPython.display import Image 
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
import config
from copy import deepcopy
import numpy as np
from PIL import Image
from imageio import imwrite
from fastai.core import Path
from fastai.vision import load_learner, open_image, Image
from time import time

In [3]:
from image_graph import *
from line_bounds import *
from pred_handler import get_top_preds

In [4]:
pic_name = 'GK_RDR_PG3_2'

im_path = Path('../greek_pages/page_images/')
line_dest = Path('../greek_pages/line_images/')
im_name = pic_name + '.jpeg'

model_name = 'rn_34.pkl'
config.model = load_learner('../models/', model_name)
splitter_model = load_learner('../models/', 'split_model.pkl')

img_arr = get_image_array(im_path/im_name)
imnum = 0

<p style="font-size:13px;color:#FFB5A4">Get the rows that are found at the peaks of the pixel sum graph, each peak roughly represents a line split, see find_line_splits for details</p>

In [5]:
vchanges = get_line_splits(img_arr)

In [6]:
vchanges[:5]

[83, 179, 253, 301, 374]

<p style="font-size:13px;color:#FFB5A4">While there are still multiple linesplits, create a graph of the new approximate line split and create a shortest path from the first column to the last column. Use this path to separate out the top line from the rest of the page, store that in its own image to be processed by the letter parser, and get the linesplits for the rest of the file.</p>
<br/>
<p style="font-size:14px;color:#FFB5A4">NB: sometimes a linesplit approximation does not contain a linesplit, so we cycle through splits until we find one with a path whose weight is less than 50 (note pixel values are inverted here to fit with shortest path representation)</p>
<br/>
<p style="font-size:13px;color:#FFB5A4">This method is currently too slow for the final application (a little over 2 minutes for half a page of text - see below), but will work for the first version. Future improvements include parallel processing of the lines and improving the way that the image graphs are creating, since this is the main consumer of time</p>

In [7]:
start = time()
while len(vchanges) > 1:
    i,j = 0,1
    top, bottom = vchanges[i], vchanges[j]
    sub_array = img_arr[top:bottom,:]
    G = get_line_graph(sub_array)
    sp_inds, totalweight = get_sp(G)

    while totalweight > 50: # the image passed should be high-contrast so white pixels should have a low value
        i += 1
        j += 1
        vchanges = get_line_splits(img_arr)
        top, bottom = vchanges[i], vchanges[j]
        sub_array = img_arr[top:bottom,:]
        G = get_line_graph(sub_array)
        sp_inds, totalweight = get_sp(G)

    top_im, bot_im = get_split_images(sub_array, sp_inds, top, img_arr)

    line_one = np.array(top_im).T
    line_two = np.array(bot_im).T
    imwrite(line_dest/'line_{}.jpg'.format(imnum),line_one)

    img_arr = np.asarray(line_two)
    imnum += 1
    vchanges = get_line_splits(img_arr)
stop = time()

In [8]:
(stop - start) / 60

2.34203603665034

<p style="font-size:13px;color:#FFB5A4">This method works very well, the only issue is with accents and diacritics which sometimes appear on the line above instead of their appropriate line. This will be handled in the character parsing of the lines.</p>
</br> 
<p style="font-size:13px;color:#FFB5A4">Here is an example of the first two lines generated by the following image:</p>
<img src="../greek_pages/page_images/GK_RDR_PG3_2.jpeg" style="width: 400px; height=100px;" />
</br>  
</br>
<img src="../greek_pages/line_images/line_0.jpg" style="width: 700px; height=50px;" />
<img src="../greek_pages/line_images/line_1.jpg" style="width: 700px; height=50px;" />