In [12]:
# import some common libraries
import numpy as np
import cv2
import random

import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Detectron
import detectron2

# Helper Library
import os
import json
from PIL import Image
import matplotlib
from matplotlib import pyplot as plt

## Training
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer
## Validation
from detectron2.engine import DefaultPredictor

from detectron2 import model_zoo
import time

# Region cropping
def get_predictor():
    cfg_zoo = "faster_rcnn_R_50_C4_1x.yaml"
    cfg_model_path = "./results/faster-rcnn/05.04.2021, 22;28;07/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml/model_final.pth"

    # Initialize predictor
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/" + cfg_zoo))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set threshold for this model
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
    cfg.MODEL.WEIGHTS = cfg_model_path
    predictor = DefaultPredictor(cfg)
    return predictor

def get_image_bounding_box(image_path, predictor):
    im = cv2.imread(image_path)
    outputs = predictor(im)
    bbox = outputs["instances"].to("cpu").pred_boxes
    return bbox

def get_cropped_imgs_bbox(img_path, bboxes):
    images = []
    bbox_arr = bboxes.tensor.cpu().numpy()
    im = cv2.imread(img_path)
    
    for bbox in bbox_arr:
        x0 = np.around(bbox[0]).astype(int) 
        y0 = np.around(bbox[1]).astype(int)
        x1 = np.around(bbox[2]).astype(int)
        y1 = np.around(bbox[3]).astype(int)
        
        images.append(im[y0:y1,x0:x1])
    
    return images #bgr2gray later
#endregion Cropping

#Region Matching
# Check smaller width dimension of 2 images
# Smaller images: check width x dimension and choose the smaller
# Resize higher image resolution with the smaller dimension (width / height)
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # resize the image
    resized = cv2.resize(image, dim, interpolation = inter)

    # return the resized image
    return resized

def get_downscaled_images(img1, img2):
    img1_h, img1_w, img1_c = img1.shape
    img2_h, img2_w, img2_c = img2.shape
    
    if img1_h > img2_h:
        scaled_img1 = image_resize(img1, width=img2_w)
        scaled_img2 = img2
    else:
        scaled_img2 = image_resize(img2, width=img1_w)
        scaled_img1 = img1
        
    
    return scaled_img1, scaled_img2

def get_orb_kp_and_des_by_img(img1):
    n_features = 3000
    factor = 2.0
    orb = cv2.ORB_create(n_features, factor)
    img1_gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    
    keypoints_img, descriptor_img =  orb.detectAndCompute(img1_gray, None)
    return keypoints_img, descriptor_img

def get_matches(query_orb_descriptor, train_orb_descriptor):
    orb_bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    return orb_bf.match(query_orb_descriptor,train_orb_descriptor)

# Set treshold to 64.0 based on ORB original paper ratio
def get_good_orb_matches(matches):
    good = []
    for m in matches:
        if m.distance < 64:
            good.append(m)
            
    return good
#endregion Matching


def main():
    start_time = time.time()
    img1_path = './dataset/e-ktp/val/images/78_ktp.jpg' # for test
    img2_path = './dataset/e-ktp/val/images/78_selfie.jpg' # for test
    start_detection_time = time.time()
    predictor = get_predictor()
    img1_bbox = get_image_bounding_box(img1_path, predictor)
    img2_bbox = get_image_bounding_box(img2_path, predictor)
    end_detection_time = (time.time() - start_detection_time)
    start_cropping_time = time.time()
    imgs1_cropped = get_cropped_imgs_bbox(img1_path, img1_bbox)
    imgs2_cropped = get_cropped_imgs_bbox(img2_path, img2_bbox)
    end_cropping_time = (time.time() - start_cropping_time)
    total_detection_time = end_detection_time + end_cropping_time
    
    results = []
    
    for idx_img1, img1_cropped in enumerate(imgs1_cropped):
        for idx_img2, img2_cropped in enumerate(imgs2_cropped):
            each_time = time.time()
            img1_ds, img2_ds = get_downscaled_images(img1_cropped, img2_cropped)
            img1_kp, img1_desc = get_orb_kp_and_des_by_img(img1_ds)
            img2_kp, img2_desc = get_orb_kp_and_des_by_img(img2_ds)
            matches = get_matches(img1_desc, img2_desc)
            good_matches = get_good_orb_matches(matches)
            results.append({
                "compare": "Image 1 Cropped "+str(idx_img1)+" to Image 2 Cropped "+str(idx_img2),
                "img1_keypoints": len(img1_kp),
                "img1_dimension": img1_ds.shape,
                "img2_keypoints": len(img2_kp),
                "img2_dimension": img2_ds.shape,
                "total_matches": len(matches),
                "total_good_matches": len(good_matches),
                "execution_time": (time.time() - each_time)
            })
            
    execution_time = (time.time() - start_time)
    print("=================RESULTS=================")
    print()
    print("====DETECTION USING FASTER-RCNN====")
    print("Total Cropped Image from Image 1: "+str(len(imgs1_cropped)))
    print("Total Cropped Image from Image 2: "+str(len(imgs2_cropped)))
    print("Execution Time: "+str(end_cropping_time))
    print("===================================")
    print()
    print("========MATCHING USING ORB========")
    for result in results:
        print("--"+result['compare'])
        print("Image 1 Total Keypoints: "+str(result['img1_keypoints']))
        print("Image 1 Dimensions: %s" % (result['img1_dimension'],))
        print("Image 2 Total Keypoints: "+str(result['img2_keypoints']))
        print("Image 2 Dimensions: %s" % (result['img2_dimension'],))
        print("Total Matches: "+str(result['total_matches']))
        print("Total \"Good\" Matches: "+str(result['total_good_matches']))
        print("Execution Time: "+str(result['execution_time'])+" seconds")
        print("----------------------------------------------")
    print("==================================")
    print()
    print("Overall Execution Time: "+str(execution_time)+" seconds")
    print("=========================================")

if __name__ == "__main__":
    main()

1.7.1+cu110 True

====DETECTION USING FASTER-RCNN====
Total Cropped Image from Image 1: 2
Total Cropped Image from Image 2: 2
Execution Time: 2.5054636001586914

--Image 1 Cropped 0 to Image 2 Cropped 0
Image 1 Total Keypoints: 2596
Image 1 Dimensions: (572, 1023, 3)
Image 2 Total Keypoints: 686
Image 2 Dimensions: (633, 1023, 3)
Total Matches: 377
Total "Good" Matches: 357
Execution Time: 0.0659644603729248 seconds
----------------------------------------------
--Image 1 Cropped 0 to Image 2 Cropped 1
Image 1 Total Keypoints: 2594
Image 1 Dimensions: (555, 992, 3)
Image 2 Total Keypoints: 945
Image 2 Dimensions: (386, 992, 3)
Total Matches: 414
Total "Good" Matches: 357
Execution Time: 0.06499791145324707 seconds
----------------------------------------------
--Image 1 Cropped 1 to Image 2 Cropped 0
Image 1 Total Keypoints: 2341
Image 1 Dimensions: (1155, 1023, 3)
Image 2 Total Keypoints: 686
Image 2 Dimensions: (633, 1023, 3)
Total Matches: 310
Total "Good" Matches: 272
Execution Tim