In [1]:
from google.cloud import vision
from google.cloud.vision import types
import io
from PIL import Image, ImageDraw
from enum import Enum
import os
import uuid
import glob
import time 


In [2]:
# make sure you have "data" directory in parallel to "notebooks". Create "input" directory under
# "data" directory and copy sample image file.
# application creates "output" programmatically.
# ----+/notebooks
# ----+/data
# ----------+/input

base_dir                   = '/Users/kd/Workspace/python/github/handwriting-recognition'
data_dir                   = 'data'
input_data_dir             = 'input'
output_data_dir            = 'output'

output_extracted_tables_dir  = 'tables'
output_extracted_boxes_dir   = 'boxes'
output_extracted_letters_dir = 'letters'

input_filename               = 'sample_input_02.jpg'

In [3]:
# utility function
def create_directory(path):
    try:
        os.mkdir(path)
        return True
    except FileExistsError as fe_error:
        return True
    except OSError as error:
        print(error)
    return False

# read files present in a directory
def read_directory_files(path, pattern='*'):
    files = [f for f in glob.glob(os.path.join(path, pattern))]
    return files

def get_subdirectories(path):
    return [f.path for f in os.scandir(output_boxes_dir) if f.is_dir() ] 

def show_img(img):
    plt.axis('off')
    plt.figure(figsize=(10,10))
    plt.imshow(img);

In [4]:
def ocr_from_google_vision(client, filepath):
    with io.open(filepath, 'rb') as image_file1:
            content = image_file1.read()
    content_image = types.Image(content=content)
    response = client.document_text_detection(image=content_image)
    document = response.full_text_annotation
    return document.text

In [5]:
# program initialization 
img_filename    = os.path.join(base_dir, data_dir, input_data_dir, input_filename)
print("input filename : [%s]" % (img_filename))

processing_basedir  = os.path.join(base_dir, data_dir, output_data_dir, os.path.splitext(input_filename)[0])
print("processing dir: [%s]" % (processing_basedir))

output_tables_dir = os.path.join(processing_basedir, output_extracted_tables_dir)
print("tables dir: [%s]" % (output_tables_dir))

output_boxes_dir = os.path.join(processing_basedir, output_extracted_boxes_dir)
print("boxes dir: [%s]" % (output_boxes_dir))

output_letters_dir = os.path.join(processing_basedir, output_extracted_letters_dir)
print("letters: [%s]" % (output_letters_dir))

input filename : [/Users/kd/Workspace/python/github/handwriting-recognition/data/input/sample_input_02.jpg]
processing dir: [/Users/kd/Workspace/python/github/handwriting-recognition/data/output/sample_input_02]
tables dir: [/Users/kd/Workspace/python/github/handwriting-recognition/data/output/sample_input_02/tables]
boxes dir: [/Users/kd/Workspace/python/github/handwriting-recognition/data/output/sample_input_02/boxes]
letters: [/Users/kd/Workspace/python/github/handwriting-recognition/data/output/sample_input_02/letters]


In [10]:
client     = vision.ImageAnnotatorClient()
boxes_dirs = get_subdirectories(output_boxes_dir)
gvision_output = []

for boxes_dir in boxes_dirs:
    boxes_files = read_directory_files(boxes_dir)
    
    for file in boxes_files:
        text = ocr_from_google_vision(client, file)
        gvision_output.append([os.path.basename(boxes_dir), os.path.basename(file), text.strip('\n')])
        print("table: [%s], boxes: [%s], text: [%s]" % (os.path.basename(boxes_dir), os.path.basename(file), text.strip('\n')))
        time.sleep(1)
        


table: [0_0_sample_input_02], boxes: [8_3_0_0_sample_input_02.jpg], text: []
table: [0_0_sample_input_02], boxes: [6_1_0_0_sample_input_02.jpg], text: []
table: [0_0_sample_input_02], boxes: [5_0_0_0_sample_input_02.jpg], text: []
table: [0_0_sample_input_02], boxes: [0_3_0_0_sample_input_02.jpg], text: [અધ્યયન નિષ્પતિ ક્રમ]
table: [0_0_sample_input_02], boxes: [3_2_0_0_sample_input_02.jpg], text: [0과]
table: [0_0_sample_input_02], boxes: [2_4_0_0_sample_input_02.jpg], text: [to]
table: [0_0_sample_input_02], boxes: [5_4_0_0_sample_input_02.jpg], text: []
table: [0_0_sample_input_02], boxes: [4_2_0_0_sample_input_02.jpg], text: [02]
table: [0_0_sample_input_02], boxes: [9_1_0_0_sample_input_02.jpg], text: []
table: [0_0_sample_input_02], boxes: [7_3_0_0_sample_input_02.jpg], text: []
table: [0_0_sample_input_02], boxes: [2_0_0_0_sample_input_02.jpg], text: [04]
table: [0_0_sample_input_02], boxes: [10_0_0_0_sample_input_02.jpg], text: []
table: [0_0_sample_input_02], boxes: [1_1_0_0_sa

In [11]:
gvision_output

[['0_0_sample_input_02', '8_3_0_0_sample_input_02.jpg', ''],
 ['0_0_sample_input_02', '6_1_0_0_sample_input_02.jpg', ''],
 ['0_0_sample_input_02', '5_0_0_0_sample_input_02.jpg', ''],
 ['0_0_sample_input_02', '0_3_0_0_sample_input_02.jpg', 'અધ્યયન નિષ્પતિ ક્રમ'],
 ['0_0_sample_input_02', '3_2_0_0_sample_input_02.jpg', '0과'],
 ['0_0_sample_input_02', '2_4_0_0_sample_input_02.jpg', 'to'],
 ['0_0_sample_input_02', '5_4_0_0_sample_input_02.jpg', ''],
 ['0_0_sample_input_02', '4_2_0_0_sample_input_02.jpg', '02'],
 ['0_0_sample_input_02', '9_1_0_0_sample_input_02.jpg', ''],
 ['0_0_sample_input_02', '7_3_0_0_sample_input_02.jpg', ''],
 ['0_0_sample_input_02', '2_0_0_0_sample_input_02.jpg', '04'],
 ['0_0_sample_input_02', '10_0_0_0_sample_input_02.jpg', ''],
 ['0_0_sample_input_02', '1_1_0_0_sample_input_02.jpg', '୨୦'],
 ['0_0_sample_input_02', '4_1_0_0_sample_input_02.jpg', '04'],
 ['0_0_sample_input_02', '7_0_0_0_sample_input_02.jpg', ''],
 ['0_0_sample_input_02', '9_2_0_0_sample_input_02.jpg