# 한글 손글씨 인식 서비스 만들기

In [14]:
import os
import numpy as np
from PIL import Image
import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, AutoTokenizer
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, AutoTokenizer
import requests 
import unicodedata
from io import BytesIO
from PIL import Image

In [3]:
import torch
print(torch.cuda.is_available())

False


In [None]:
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import requests

# load image from the IAM database
url = 'https://fki.tic.heia-fr.ch/static/img/a01-122-02-00.jpg'
image = Image.open(requests.get(url, stream=True).raw).convert("RGB")

processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-handwritten')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-large-handwritten')
pixel_values = processor(images=image, return_tensors="pt").pixel_values

generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]


In [None]:
import torch

from transformers import VisionEncoderDecoderModel

model = VisionEncoderDecoderModel.from_pretrained("team-lucid/trocr-small-korean")

pixel_values = torch.rand(1, 3, 384, 384)
generated_ids = model.generate(pixel_values)


In [None]:
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, AutoTokenizer
import requests 
import unicodedata
from io import BytesIO
from PIL import Image

processor = TrOCRProcessor.from_pretrained("ddobokki/ko-trocr") 
model = VisionEncoderDecoderModel.from_pretrained("ddobokki/ko-trocr")
tokenizer = AutoTokenizer.from_pretrained("ddobokki/ko-trocr")

url = "https://raw.githubusercontent.com/ddobokki/ocr_img_example/master/g.jpg"
response = requests.get(url)
img = Image.open(BytesIO(response.content))

pixel_values = processor(img, return_tensors="pt").pixel_values 
generated_ids = model.generate(pixel_values, max_length=64)
generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
generated_text = unicodedata.normalize("NFC", generated_text)
print(generated_text)


In [15]:
class TrOCRInferencer:
    def __init__(self):
        print("[info] init TrOCR Inferencer")
        self.processor = TrOCRProcessor.from_pretrained("ddobokki/ko-trocr")
        self.model = VisionEncoderDecoderModel.from_pretrained("ddobokki/ko-trocr")
        self.tokenizer = AutoTokenizer.from_pretrained("ddobokki/ko-trocr")
    def inference(self, image):
        pixel_values = self.processor(images=image, return_tensors='pt').pixel_values
        generated_ids = self.model.generate(pixel_values, max_length=64)
        generated_text = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        generated_text = unicodedata.normalize("NFC", generated_text)
        
        return generated_text

inferencer = TrOCRInferencer()

def image_to_text(image):
    image = Image.fromarray(image).convert('RGB')
    text = inferencer.inference(image)
    return text
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")
    with gr.Tab("Image upload"):
        image = gr.Image(label="Handritten image file")
        output = gr.Textbox(label="Output Box")
        convert_btn = gr.Button("Convert")
        convert_btn.click(
            fn=image_to_text, inputs=image, outputs=output
        )
        gr.Markdown("## Image Examples")
        gr.Examples(
            examples=[
                os.path.join(os.getcwd(), "examples/Hello.png"),
                os.path.join(os.getcwd(), "examples/Hello_cursive.png"),
                os.path.join(os.getcwd(), "examples/Red.png"),
                os.path.join(os.getcwd(), "examples/sentence.png"),
                os.path.join(os.getcwd(), "examples/i_love_you.png"),
                os.path.join(os.getcwd(), "examples/merrychristmas.png"),
                os.path.join(os.getcwd(), "examples/Rock.png"),
                os.path.join(os.getcwd(), "examples/Bob.png"),
                ],
            inputs=image,
            outputs=output,
            fn=image_to_text
            )
    with gr.Tab("Drawing"):
        gr.Markdown("# Handwritten Image OCR")
        sketchpad = gr.Sketchpad(
            label = "Handwritten Sektchpad",
            shape=(600, 300),
            brush_radius=3,
            invert_colors=False,
            )
        output = gr.Textbox(label="Output Box")
        convert_btn = gr.Button("Convert")
        convert_btn.click(
            fn=image_to_text, inputs=sketchpad, outputs=output
            )
app.launch(inline=False, share=True)

[info] init TrOCR Inferencer




Running on local URL:  http://127.0.0.1:7871
IMPORTANT: You are using gradio version 3.40.0, however version 4.29.0 is available, please upgrade.
--------
Running on public URL: https://354061f7d5e6fb531c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




-----------------------------------------------------------------------------

In [1]:
import gradio as gr
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# Image Embedder 옵션 설정
base_options = python.BaseOptions(model_asset_path='embedder.tflite')
l2_normalize = True
quantize = True
options = vision.ImageEmbedderOptions(
    base_options=base_options, l2_normalize=l2_normalize, quantize=quantize)

# Image Embedder 생성
embedder = vision.ImageEmbedder.create_from_options(options)

# 유사도 계산 함수 정의
def compare_images(image1, image2):
    if image1 is None or image2 is None:
        return "이미지를 모두 업로드해주세요."
    
    mp_image1 = mp.Image(image_format=mp.ImageFormat.SRGB, data=image1)
    mp_image2 = mp.Image(image_format=mp.ImageFormat.SRGB, data=image2)
    
    embedding_result1 = embedder.embed(mp_image1)
    embedding_result2 = embedder.embed(mp_image2)
    
    similarity = vision.ImageEmbedder.cosine_similarity(
        embedding_result1.embeddings[0],
        embedding_result2.embeddings[0])
    
    return f"두 이미지의 유사도: {similarity:.4f}"

# Gradio 인터페이스 생성
iface = gr.Interface(
    fn=compare_images,
    inputs=[
        gr.Image(type="numpy", label="첫 번째 이미지 업로드"),
        gr.Image(type="numpy", label="두 번째 이미지 업로드")
    ],
    outputs="text",
    title="이미지 유사도 비교",
    description="두 이미지를 업로드하여 유사도를 비교하세요."
)

iface.launch()


  from .autonotebook import tqdm as notebook_tqdm
I0000 00:00:1727148297.757869   30425 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1727148297.779019   31148 gl_context.cc:357] GL version: 3.1 (OpenGL ES 3.1 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: D3D12 (AMD Radeon(TM) Graphics)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




IMPORTANT: You are using gradio version 3.40.0, however version 4.29.0 is available, please upgrade.
--------


