In [None]:
!pip install doclayout-yolo deskew

In [None]:
import os
import json
import sys
import time
from doclayout_yolo import YOLOv10
import torch
import cv2
from deskew import determine_skew

In [None]:
# Check if CUDA (GPU support) is available
print(f"CUDA Available: {torch.cuda.is_available()}")

In [None]:
def deskew(src_img_path):
    """
    Attempts to deskew an image.
    - If skew is detected, returns the rotated image.
    - If no skew is detected, returns the original image.
    - If the image can't be read, throws an exception.
    """
    image = cv2.imread(src_img_path)


    if image is None:
        raise cv2.error(f"Could not read image file: {src_img_path}")

    grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    angle = determine_skew(grayscale)
    if angle is None:
        print("   -> No skew detected.")
        return image

    if abs(angle) > 0.01:
        print(f"   -> Detected angle: {angle:.2f} degrees")
        center = (image.shape[1] // 2, image.shape[0] // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)

        rotated = cv2.warpAffine(
            image,
            M,
            (image.shape[1], image.shape[0]),
            flags=cv2.INTER_CUBIC,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(255, 255, 255)
        )
        return rotated
    else:
        print("   -> No significant skew detected.")
        return image

In [None]:
INPUT_DIR = ""
JSON_OUTPUT_DIR = ""

category_mapping = {
    0: {"id": 2, "name": "Title"},
    1: {"id": 1, "name": "Text"},
    2: {"id": None, "name": "Abandon"},
    3: {"id": 5, "name": "Figure"},
    4: {"id": 1, "name": "Text"},
    5: {"id": 4, "name": "Table"},
    6: {"id": 1, "name": "Text"},
    7: {"id": 1, "name": "Text"},
    8: {"id": 1, "name": "Text"},
    9: {"id": 1, "name": "Text"}
}

In [None]:
def print_flush(x):
    print(x)
    sys.stdout.flush()

In [None]:
def process(img_filename, img_path, model, json_dir):
    deskewed_image = deskew(img_path)
    det_res = model.predict(
        img_path,
        imgsz=1024,
        conf=0.2,
        device="0",
        verbose=False
    )
    annotations = []
    results = det_res[0]
    for box in results.boxes:
        original_category_id = int(box.cls[0])

        if category_mapping[original_category_id]["id"] is None:
            continue

        x1, y1, x2, y2 = box.xyxy[0].tolist()
        bbox = [
            round(x1, 2),
            round(y1, 2),
            round(x2 - x1, 2),
            round(y2 - y1, 2)
        ]

        new_category = category_mapping[original_category_id]
        new_category_id = new_category["id"]
        new_category_name = new_category["name"]

        annotations.append({
            "bbox": bbox,
            "category_id": new_category_id,
            "category_name": new_category_name
        })
    save_json_file({
        "file_name": img_filename,
        "annotations": annotations
    }, json_dir)


In [None]:
def save_json_file(data, out_path):
    fn = os.path.splitext(data["file_name"])[0]
    output_json_path = os.path.join(out_path, fn + ".json")
    # print("saving file: "+ fn + ".json")

    with open(output_json_path, 'w') as f:
        json.dump(data, f, indent=2)


In [None]:
model = YOLOv10("") # Point to your model
print_flush("getting files\n")
files = os.listdir(INPUT_DIR)
count = len(files)
# Create output directory if it doesn't exist
os.makedirs(JSON_OUTPUT_DIR, exist_ok=True)
start = time.perf_counter()
for i, img_filename in enumerate(files):
    now = time.perf_counter() - start
    eta = now/(i+1) * (count-i+1)
    print(
f"""Elapsed     : {(now-now%60)/60:2.0f}m:{(now%60):2.3f}s
time per img: {now/(i+1)*1000:.0f} ms
ETA         : {(eta - eta%60)/60:2.0f}m:{eta%60:2.0f}s

processing image: {img_filename}  {i+1}/{count}""")
    img_path = os.path.join(INPUT_DIR, img_filename)
    process(img_filename, img_path, model, JSON_OUTPUT_DIR)
    sys.stdout.write("\033[5A")  # move cursor up 5 lines
    sys.stdout.write("\033[J")   # clear from cursor to end of screen