In [1]:
from pathlib import Path
from course_intro_ocr_t1.data import MidvPackage
from tqdm import tqdm
import numpy as np
import cv2

In [2]:
DATASET_PATH = Path().absolute().parent.parent / 'midv500' / 'midv500_compressed'
assert DATASET_PATH.exists(), DATASET_PATH.absolute()

data_packs = MidvPackage.read_midv500_dataset(DATASET_PATH)
print(f"Loaded {len(data_packs)} packages.")

Loaded 50 packages.


In [3]:
class Cropper:
    def __init__(self):
        self.orb = cv2.ORB_create(nfeatures=2000)
        
    def preprocess_image(self, img):
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return gray_img
        
    def detect_compute_keypoints_descriptors(self, img):
        keypoints, descriptors = self.orb.detectAndCompute(img, None)
        return keypoints, descriptors
        
    def match_keypoints(self, template_dscs, target_dscs):
        bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
        matches = bf.match(template_dscs, target_dscs)
        matches = sorted(matches, key = lambda x:x.distance)
        return matches
        
    def find_homography(self, query_pts, train_pts):
        homo, _ = cv2.findHomography(query_pts, train_pts, cv2.RANSAC, 5.0)
        return homo
        
    def transform_angles(self, homo, template_img):
        template_angles = np.array([[0, 0], [len(template_img[0]), 0], [len(template_img[0]), len(template_img)], [0, len(template_img)]], dtype=np.float32)[:, None]
        transformed_angles = cv2.perspectiveTransform(template_angles, homo)
        return transformed_angles
        
    def normalize_coordinates(self, angles, target_img):
        return angles / np.array([len(target_img[0]), len(target_img)])
        
    def angles(self, template_img, target_img):
        template_gray = self.preprocess_image(template_img)
        target_gray = self.preprocess_image(target_img)
        template_kpts, template_dscs = self.detect_compute_keypoints_descriptors(template_gray)
        target_kpts, target_dscs = self.detect_compute_keypoints_descriptors(target_gray)
        matches = self.match_keypoints(template_dscs, target_dscs)
        homo = self.find_homography(np.array([template_kpts[m.queryIdx].pt for m in matches], dtype=np.float32)[:, None],
                                       np.array([target_kpts[m.trainIdx].pt for m in matches], dtype=np.float32)[:, None])
        transformed_angles = self.transform_angles(homo, template_img)
        normalized_angles = self.normalize_coordinates(transformed_angles, target_img)
        return normalized_angles

    def process_data_packs(self, data_packs):
        results_dict = {}
        for dp in tqdm(data_packs):
            for i in range(len(dp)):
                if dp[i].is_test_split():
                    try:
                        results_dict[dp[i].unique_key] = self.angles(np.array(dp.template_item.image), np.array(dp[i].image))
                    except Exception as exc:
                        pass
        return results_dict


In [4]:
cropper = Cropper()
results_dict = cropper.process_data_packs(data_packs)
output_dict = {key: arr.squeeze() for key, arr in results_dict.items()}

 42%|████▏     | 21/50 [01:32<03:02,  6.31s/it]

## Узнаем точность

In [None]:
from course_intro_ocr_t1.metrics import dump_results_dict, measure_crop_accuracy

dump_results_dict(results_dict, Path() / 'pred.json')
acc = measure_crop_accuracy(
    Path() / 'pred.json',
    Path() / 'gt.json'
)
print("Точность кропа: {:1.4f}".format(acc))