In [1]:
from pathlib import Path
from course_intro_ocr_t1.data import MidvPackage
from tqdm import tqdm
import numpy as np
import cv2
import json

In [2]:
DATASET_PATH = Path().absolute().parent.parent / 'midv500' / 'midv500_compressed'
assert DATASET_PATH.exists(), DATASET_PATH.absolute()

data_packs = MidvPackage.read_midv500_dataset(DATASET_PATH)
len(data_packs), type(data_packs[0])

(50, course_intro_ocr_t1.data.MidvPackage)

In [3]:
import cv2
import numpy as np
from tqdm import tqdm

def process_image(image):
    gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(gray_image, None)
    return gray_image, keypoints, descriptors

def find_homography(template_keypoints, target_keypoints, matches, reproj_thresh=5.0):
    src_points = np.float32([template_keypoints[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
    dst_points = np.float32([target_keypoints[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
    homography_matrix, mask = cv2.findHomography(src_points, dst_points, cv2.RANSAC, reproj_thresh)
    return homography_matrix, mask

def transform_corners(template_gray, homography_matrix, target_gray):
    height, width = template_gray.shape
    corners = np.float32([[0, 0], [width, 0], [width, height], [0, height]])
    transformed_corners = cv2.perspectiveTransform(corners[None, :, :], homography_matrix)[0]
    height, width = target_gray.shape
    normalized_corners = transformed_corners / [width, height]
    return normalized_corners.tolist()

def filter_matches(matches, ratio=0.75):
    good_matches = []
    for m, n in matches:
        if m.distance < ratio * n.distance:
            good_matches.append(m)
    return good_matches

results = {}

for package in tqdm(data_packs):
    for item in package:
        if item.is_test_split():
            try:
                template_gray, template_keypoints, template_descriptors = process_image(package.template_item.image)
                target_gray, target_keypoints, target_descriptors = process_image(item.image)

                bf_matcher = cv2.BFMatcher()
                raw_matches = bf_matcher.knnMatch(template_descriptors, target_descriptors, k=2)
                matches = filter_matches(raw_matches)

                homography_matrix, mask = find_homography(template_keypoints, target_keypoints, matches)
                normalized_corners = transform_corners(template_gray, homography_matrix, target_gray)
                results[item.unique_key] = normalized_corners
            except Exception as e:
                pass

100%|██████████| 50/50 [08:19<00:00,  9.99s/it]


In [4]:
from course_intro_ocr_t1.metrics import dump_results_dict, measure_crop_accuracy

dump_results_dict(results, Path() / 'pred.json')
accuracy = measure_crop_accuracy(
    Path() / 'pred.json',
    Path() / 'gt.json'
)
print(f'Crop accuracy: {accuracy:.4f}')

Crop accuracy: 0.9640
