In [2]:
import os
import torch
import numpy as np
import gradio as gr
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel



In [12]:
class TrOCRInferencer:
    def __init__(self):
        print("[info] init TrOCR Inferencer")
        self.processor = TrOCRProcessor.from_pretrained("ddobokki/ko-trocr")
        self.model = VisionEncoderDecoderModel.from_pretrained("ddobokki/ko-trocr")
        
    def inference(self, image):
        pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
        generated_ids = self.model.generate(pixel_values)
        generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return generated_text

In [13]:
inferencer = TrOCRInferencer()

[info] init TrOCR Inferencer




In [16]:
def image_to_text(image):
    image = Image.fromarray(image).convert('RGB')
    text = inferencer.inference(image)
    return text

In [18]:
class TrOCRInferencer:
    def __init__(self):
        print("[info] init TrOCR Inferencer")
        self.processor = TrOCRProcessor.from_pretrained("ddobokki/ko-trocr")
        self.model = VisionEncoderDecoderModel.from_pretrained("ddobokki/ko-trocr")
        
    def inference(self, image):
        pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
        generated_ids = self.model.generate(pixel_values)
        generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return generated_text
    
inferencer = TrOCRInferencer()

def image_to_text(image):
    image = Image.fromarray(image).convert('RGB')
    text = inferencer.inference(image)
    return text

with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")
    with gr.Tab("Image upload"):
        image = gr.Image(label="Handritten image file")
        output = gr.Textbox(label="Output Box")
        convert_btn = gr.Button("Convert")
        convert_btn.click(
            fn=image_to_text, inputs=image, outputs=output
        )
        gr.Markdown("## Image Examples")
        gr.Examples(
            examples=[
                os.path.join(os.getcwd(), "examples/Hello.png"),
                os.path.join(os.getcwd(), "examples/Hello_cursive.png"),
                os.path.join(os.getcwd(), "examples/Red.png"),
                os.path.join(os.getcwd(), "examples/sentence.png"),
                os.path.join(os.getcwd(), "examples/i_love_you.png"),
                os.path.join(os.getcwd(), "examples/merrychristmas.png"),
                os.path.join(os.getcwd(), "examples/Rock.png"),
                os.path.join(os.getcwd(), "examples/Bob.png"),
                ],
            inputs=image,
            outputs=output,
            fn=image_to_text
            )
    with gr.Tab("Drawing"):
        gr.Markdown("# Handwritten Image OCR")
        sketchpad = gr.Sketchpad(
            label = "Handwritten Sektchpad",
            shape=(600, 300),
            brush_radius=3,
            invert_colors=False,
            )
        output = gr.Textbox(label="Output Box")
        convert_btn = gr.Button("Convert")
        convert_btn.click(
            fn=image_to_text, inputs=sketchpad, outputs=output
            )
app.launch(inline=False, share=True)

[info] init TrOCR Inferencer




Running on local URL:  http://127.0.0.1:7867
IMPORTANT: You are using gradio version 3.40.0, however version 4.29.0 is available, please upgrade.
--------
Running on public URL: https://bfebab5b4858dd7abe.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [9]:
app.close()

Closing server running on port: 7864
