In [None]:
!pip install ultralytics

In [None]:
import zipfile
import os

yolo_model_dir = '/content/yolo_model'
os.makedirs(yolo_model_dir, exist_ok=True)

with zipfile.ZipFile('/content/drive/MyDrive/Models/words-yolov5/words-yolo.zip', 'r') as f:
  f.extractall(yolo_model_dir)

In [None]:
import json
import shutil

SOURCE2_DIR = '/content/SOURCE2/'
SOURCE3_DIR = '/content/SOURCE3/'

os.makedirs(SOURCE2_DIR, exist_ok=True)
os.makedirs(SOURCE3_DIR, exist_ok=True)

with zipfile.ZipFile('/content/drive/MyDrive/Datasets/SOURCE2/detection.zip', 'r') as zf:
  zf.extractall(SOURCE2_DIR)

with zipfile.ZipFile('/content/drive/MyDrive/Datasets/SOURCE3/detection.zip', 'r') as zf:
  zf.extractall(SOURCE3_DIR)

In [None]:
import numpy as np
import random

source3_test_pages = [os.path.join(SOURCE3_DIR, 'test', f) for f in os.listdir(os.path.join(SOURCE3_DIR, 'test'))]

source3_test_annotations = os.path.join(SOURCE3_DIR, 'test_annotations.json')

source2_test_pages = [os.path.join(SOURCE2_DIR, 'test', f) for f in os.listdir(os.path.join(SOURCE2_DIR, 'test'))]

source2_test_annotations = os.path.join(SOURCE2_DIR, 'test_annotations.json')

test_pages = source3_test_pages + source2_test_pages
random.shuffle(test_pages)

In [None]:
def get_source2_transcriptions(img_id, json_path):
    with open(json_path, 'r') as f:
        contents = json.loads(f.read())

    words = contents[img_id]["words"]

    transcriptions = [w['transcription'] for w in words]

    return " ".join(transcriptions)

def get_source3_transcriptions(img_id, json_path):
    with open(json_path, 'r') as f:
        contents = json.loads(f.read())

    regions_contents = contents[img_id]["regions_contents"]

    transcriptions = []
    for rc in regions_contents:
        transcriptions += rc['transcriptions']

    return " ".join(transcriptions)

def parse_transcriptions(pages, source2_annotations, source3_annotations, annotations):
    for page_path in pages:
        page_id = os.path.basename(page_path[:-4])
        if SOURCE2_DIR in page_path:
            transcriptions = get_source2_transcriptions(page_id, source2_annotations)
            if page_id not in [*annotations]:
                annotations[page_id] = {"transcriptions": transcriptions}
            else:
                raise ValueError(f"Page with id: {page_id} is already in annotations")

        elif SOURCE3_DIR in page_path:
            transcriptions = get_source3_transcriptions(page_id, source3_annotations)
            if page_id not in [*annotations]:
                annotations[page_id] = {"transcriptions": transcriptions}
            else:
                raise ValueError(f"Page with id: {page_id} is already in annotations")

In [None]:
test_annotations = {}
parse_transcriptions(test_pages, source2_test_annotations,
                  source3_test_annotations, test_annotations)

In [None]:
def group_boxes_by_row(boxes, y_margin_ratio=0.4, residual_y_margin_ratio=0.2):
    page_heights = [int(box.xyxy[0][3].cpu().numpy()) - int(box.xyxy[0][1].cpu().numpy()) for box in boxes]
    avg_page_height = sum(page_heights) / len(page_heights) if len(page_heights) > 0 else 0

    sorted_boxes = sorted(boxes, key=lambda box: (int(box.xyxy[0][1].cpu().numpy()) + int(box.xyxy[0][3].cpu().numpy())) / 2)

    rows = []
    current_row = []
    row_midpoints = []

    for box in sorted_boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
        box_mid_y = (y1 + y2) / 2
        box_height = y2 - y1
        row_midpoint = sum(row_midpoints) / len(row_midpoints) if len(row_midpoints) > 0 else None

        if row_midpoint is None or box_mid_y > row_midpoint + avg_page_height * y_margin_ratio:
            if current_row:
                rows.append(current_row)
            current_row = [box]
            row_midpoints = [box_mid_y]

        else:
            current_row.append(box)
            row_midpoints.append(box_mid_y)

    if current_row:
        rows.append(current_row)

    for row in rows:
        row.sort(key=lambda box: int(box.xyxy[0][0].cpu().numpy()))

    return [box for row in rows for box in row]


In [None]:
from ultralytics import YOLO
from PIL import Image, ImageDraw, ImageFont
import os
import numpy as np
import shutil
import time

font_path = "/content/drive/MyDrive/etc/Roboto-Bold.ttf"
font_size = 24

font = ImageFont.truetype(font_path, font_size)
color = (128, 0, 128)

yolo_model = YOLO('/content/yolo_model/best.pt')
yolo_model.overrides['data'] = '/content/yolo_model/yolo_config.yaml'

eval_dir = "/content/eval_results/"
os.makedirs(eval_dir, exist_ok=True)

picked_pages = [ "0053-2.jpg",  "0179-1.jpg",  "0290-1.jpg",  "0520-2.jpg",   "d06-037.jpg",  "f07-021a.jpg",  "g02-059.jpg",  "m02-087.jpg",  "p06-030.jpg",
  "0161-1.jpg",  "0191-1.jpg",  "0313-1.jpg",  "1133-2.jpg",   "d06-100.jpg",  "f07-069.jpg",   "g04-068.jpg",  "m02-112.jpg",  "p06-042.jpg",
  "0163-4.jpg",  "0203-2.jpg",  "0390-2.jpg",  "d01-049.jpg",  "e06-000.jpg",  "f07-081b.jpg",  "m01-125.jpg",  "n01-031.jpg",
  "0173-2.jpg",  "0239-1.jpg",  "0425-4.jpg",  "d01-104.jpg",  "e06-010.jpg",  "f07-084a.jpg",  "m01-136.jpg",  "n04-139.jpg",
  "0176-4.jpg",  "0242-1.jpg",  "0479-4.jpg",  "d04-012.jpg",  "e06-049.jpg",  "f07-092a.jpg",  "m02-055.jpg",  "n04-213.jpg",
  "0178-1.jpg",  "0247-2.jpg",  "0510-2.jpg",  "d04-028.jpg",  "f04-083.jpg",  "g01-004.jpg",   "m02-059.jpg",  "p02-139.jpg"
]

picked_pages_paths = []

for page in test_pages:
  for picked_page in picked_pages:
    if picked_page in page:
      picked_pages_paths.append(page)

for img_path in picked_pages_paths:
    img = Image.open(img_path).convert('L').convert('RGB')
    img_np = np.array(img)

    results = yolo_model.predict(source=img_np, imgsz=946, device='cuda:0', max_det=300, iou=0.5)

    print(os.path.basename(img_path))
    results = results[0]
    sorted_boxes = group_boxes_by_row(results.boxes)

    draw = ImageDraw.Draw(img)

    for i, box in enumerate(sorted_boxes):
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
        draw.rectangle([x1, y1, x2, y2], outline=color, width=3)
        draw.text((x1, y1), str(i + 1), fill=color, font=font)

    annotated_image_path = os.path.join(eval_dir, os.path.basename(img_path))
    img.save(annotated_image_path)



In [None]:
import shutil
shutil.make_archive('/content/eval_results' ,'zip', '/content/eval_results')