<a href="https://colab.research.google.com/github/rafia9005/WriteVision/blob/main/jp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

init project

In [3]:
import os
import argparse
import logging
import requests
import base64
from PIL import Image
from io import BytesIO
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

In [8]:
# setup logger
def setup_logger():
  logger = logging.getLogger("WritterVision")
  logger.setLevel(logging.INFO)

  handler = logging.StreamHandler()
  formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
  handler.setFormatter(formatter)
  logger.addHandler(handler)
  return logger

In [9]:
# load model
logger = setup_logger()

def load_model(model_directory="./model"):
    """
    Load model and processor. If a local model exists in the specified directory, load from there.
    Otherwise, download from Hugging Face and save locally.
    """
    try:
      if os.path.exists(model_directory) and os.path.isdir(model_directory):
        processor = TrOCRProcessor.from_pretrained(model_directory)
        model = VisionEncoderDecoderModel.from_pretrained(model_directory)
        logger.info("Model and processor loaded from local directory.")
      else:
        logger.info("Local model directory not found. Downloading model and processor from Hugging Face.")
        processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
        model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
        model.save_pretrained(model_directory)
        processor.save_pretrained(model_directory)
        logger.info("Model and processor downloaded and saved locally.")

      return processor, model
    except Exception as e:
      logger.error(f"Error loading model and processor: {e}")
      raise

In [17]:
def load_image(input_type: str, source: str):
    if input_type == "url":
        return load_image_from_url(source)
    elif input_type == "path":
        return load_image_from_path(source)
    elif input_type == "base64":
        return load_image_from_base64(source)
    else:
        error_msg = "Invalid input type. Use 'url', 'path', or 'base64'."
        logger.error(error_msg)
        raise ValueError(error_msg)

def load_image_from_url(url: str):
    try:
        logger.info(f"Loading image from URL: {url}")
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Memeriksa status HTTP
        image = Image.open(response.raw).convert("RGB")
        logger.info("Image successfully loaded from URL.")
        return image
    except Exception as e:
        error_msg = f"Failed to load image from URL: {e}"
        logger.error(error_msg)
        raise Exception(error_msg)

def load_image_from_path(path: str):
    try:
        logger.info(f"Loading image from path: {path}")
        image = Image.open(path).convert("RGB")
        logger.info("Image successfully loaded from path.")
        return image
    except Exception as e:
        error_msg = f"Failed to load image from path: {e}"
        logger.error(error_msg)
        raise Exception(error_msg)

def load_image_from_base64(base64_str: str):
    try:
        logger.info("Loading image from base64 string.")
        image_data = base64.b64decode(base64_str)
        image = Image.open(BytesIO(image_data)).convert("RGB")
        logger.info("Image successfully loaded from base64.")
        return image
    except Exception as e:
        error_msg = f"Failed to load image from base64: {e}"
        logger.error(error_msg)
        raise Exception(error_msg)

In [11]:
# utils configure
def print_result(generated_text: str):
    logger.info("Generated Text:")
    print("-" * 40)
    print(generated_text)
    print("-" * 40)

def validate_generated_text(text: str):
    if not text.strip():
        logger.warning("Generated text is empty.")
        return False
    if len(text) > 500:  # Arbitrary limit
        logger.warning("Generated text is too long.")
        return False
    logger.info("Generated text passed validation.")
    return True

In [18]:
def main():
    input_type = input("Enter input type ('url', 'path', or 'base64'): ").strip().lower()
    source = input("Enter the source (URL, file path, or base64 string): ").strip()

    try:
        logger.info("Loading image...")
        image = load_image(input_type, source)

        logger.info("Loading model...")
        processor, model = load_model()

        logger.info("Processing image...")
        pixel_values = processor(images=image, return_tensors="pt").pixel_values
        generated_ids = model.generate(pixel_values)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

        if validate_generated_text(generated_text):
            print_result(generated_text)
        else:
            logger.warning("Generated text failed validation.")
    except Exception as e:
        logger.error(f"An error occurred: {e}")

main()

Enter input type ('url', 'path', or 'base64'): url
Enter the source (URL, file path, or base64 string): https://asset-2.tstatic.net/jogja/foto/bank/images/tulisan-tangan-mirip-komputer_20151008_113306.jpg


2025-01-18 16:36:01,932 - WritterVision - INFO - Loading image...
2025-01-18 16:36:01,932 - WritterVision - INFO - Loading image...
INFO:WritterVision:Loading image...
2025-01-18 16:36:01,936 - WritterVision - INFO - Loading image from URL: https://asset-2.tstatic.net/jogja/foto/bank/images/tulisan-tangan-mirip-komputer_20151008_113306.jpg
2025-01-18 16:36:01,936 - WritterVision - INFO - Loading image from URL: https://asset-2.tstatic.net/jogja/foto/bank/images/tulisan-tangan-mirip-komputer_20151008_113306.jpg
INFO:WritterVision:Loading image from URL: https://asset-2.tstatic.net/jogja/foto/bank/images/tulisan-tangan-mirip-komputer_20151008_113306.jpg
2025-01-18 16:36:02,638 - WritterVision - INFO - Image successfully loaded from URL.
2025-01-18 16:36:02,638 - WritterVision - INFO - Image successfully loaded from URL.
INFO:WritterVision:Image successfully loaded from URL.
2025-01-18 16:36:02,642 - WritterVision - INFO - Loading model...
2025-01-18 16:36:02,642 - WritterVision - INFO - 

----------------------------------------
Lancashire County Courthouse
----------------------------------------
