In [4]:
# src: 01_ocr_vision.ipynb
import io, json
from google.cloud import vision
from PIL import Image, ImageDraw
import os


client = vision.ImageAnnotatorClient()


def ocr_image_to_json(image_path, out_json_path=None):
    with io.open(image_path, 'rb') as f:
        content = f.read()
    image = vision.Image(content=content)
    response = client.document_text_detection(image=image)
    if response.error.message:
        raise Exception(response.error.message)


    full_text = response.full_text_annotation.text


    words = []
    for page in response.full_text_annotation.pages:
        for block in page.blocks:
            for para in block.paragraphs:
                for word in para.words:
                    word_text = ''.join([s.text for s in word.symbols])
                    bbox = [(v.x, v.y) for v in word.bounding_box.vertices]
                    words.append({'text': word_text, 'bbox': bbox})


    payload = {
    'image_path': image_path,
    'text': full_text,
    'words': words
    }
    if out_json_path:
        with open(out_json_path, 'w', encoding='utf-8') as fw:
            json.dump(payload, fw, ensure_ascii=False, indent=2)
    return payload


# Visualization helper
def draw_bboxes(image_path, words, out_path=None, scale=1.0):
    img = Image.open(image_path).convert('RGB')
    draw = ImageDraw.Draw(img)
    for w in words:
        bbox = w['bbox']
# bbox is list of (x,y) -- some entries might be None
        try:
            draw.polygon([tuple(v) for v in bbox], outline='red')
        except Exception:
            pass
    if out_path:
        img.save(out_path)
    return img


# Example use:
# payload = ocr_image_to_json('data/raw_images/sample.jpg', 'data/ocr_jsons/sample.json')
# img = draw_bboxes(payload['image_path'], payload['words'], out_path='visualized.jpg')