In [8]:

import argparse
import time
import numpy as np
np.set_printoptions(suppress=True)
import sys
sys.path.append("../")
from models import *
from utils.datasets import *
from utils.utils import *
import pandas as pd
from copy import copy

In [8]:

cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
device = torch.device('cpu')

## functions to crop the card from the image and correct the orientation of the card

In [9]:
area_threshold = 0.001
torch.cuda.empty_cache()

def get_distances(points):
    card_width = max(np.linalg.norm(points[0] - points[1]),np.linalg.norm(points[3] - points[2]))
    card_height = max(np.linalg.norm(points[1] - points[2]),np.linalg.norm(points[0] - points[3]))
    
    return card_width,card_height



def camscanner(points,shape_wh,img):
    card_width,card_height = shape_wh
    pts1 = np.float32(points[[0,1,3]])
    pts2 = np.float32([[0,0],[card_width,0],[0,card_height]])#[card_width,card_height]])
    matrix = cv2.getAffineTransform(pts1,pts2)
    result = cv2.warpAffine(img, matrix, (int(card_width), int(card_height)))
    return result

In [77]:
class params:
    def __init__(self) -> None:
        self.image_folder = '../trainable_dataset_files/test.part'
        self.output_folder = '../output/'
        self.plot_flag = True
        self.txt_out = True
        self.cfg = '../cfg/yolov3.cfg'
        self.weights_path = '../weights/latest.pt'
        self.conf_thres = 0.1
        self.nms_thres = 0.0
        self.img_size = 32 * 19
        self.class_path = '../data/icdar.names'

In [78]:
opt = params()

In [79]:
out_fields =  {
    "file_name":"",
    "num_card" : "",
    "P1_x": "",
    "P1_y": "",
    "P2_x": "",
    "P2_y": "",
    "P3_x": "",
    "P3_y": "",
    "P4_x": "",
    "P4_y": "",
    "cls_pred": "",
    "conf": "",
    "cls_conf": "",
}

## generate the output_fields.csv, so that it will have all the fields that can be used for evaluation of the model

In [80]:
txt_out_filename = "output_fields.csv"
if opt.txt_out:
    df = pd.DataFrame(columns=out_fields.keys())
    df.to_csv(txt_out_filename,mode="w",index = False)

## plot the co-ordinates on the card and save the images

In [None]:
# os.system('rm -rf ' + opt.output_folder)
os.makedirs(opt.output_folder, exist_ok=True)

# Load model
model = Darknet(opt.cfg, opt.img_size)

weights_path = opt.weights_path
if weights_path.endswith('.weights'):  # saved in darknet format
    load_weights(model, weights_path)
else:  # endswith('.pt'), saved in pytorch format
    checkpoint = torch.load(weights_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'])
    del checkpoint

model.to(device).eval()

# Set Dataloader
classes = load_classes(opt.class_path)  # Extracts class labels from file
dataloader = load_images_test(opt.image_folder, batch_size=1, img_size=opt.img_size)
print(classes)

# Bounding-box colors
color_list = [[random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)] for _ in range(len(classes))]


#   imgs = []  # Stores image paths
#   img_detections = []  # Stores detections for each image index
prev_time = time.time()

for batch_i, (img_paths, img,area) in enumerate(dataloader):
    
    if img_paths[0] != "../midv_dataset/02_aut_drvlic_new/images/HA/HA02_20.tif":
        continue
    print(batch_i, img.shape)
    # Get detections
    with torch.no_grad():
        chip = torch.from_numpy(img).unsqueeze(0).to(device)
        pred = model(chip)
        print(np.array(pred[:, :, 8].cpu().numpy()))
        print(pred[:, :, 8].shape,pred.shape)
        pred = pred[pred[:, :, 8] > opt.conf_thres]
        detections = [None]
        if len(pred) > 0:
            detections = non_max_suppression_test(pred.unsqueeze(0),area,area_threshold, opt.conf_thres,opt.nms_thres)
#               img_detections.extend(detections)
#               imgs.extend(img_paths)

    print('Batch %d... (Done %.3f s)' % (batch_i, time.time() - prev_time))
    prev_time = time.time()

    if len(detections) <1 :
        out_fields_copy = copy(out_fields)
        out_fields_copy["file_name"] = path
        out_fields_copy["num_card"] = 0
        out_row = pd.DataFrame(out_fields_copy,index = [0])
        out_row.to_csv(txt_out_filename, mode = "a", index = False, header = False)
        continue

    if len(detections) == 1 and detections[0] is None:
        continue

    # Iterate through images and save plot of detections
    # for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
    path = img_paths[0]
    detections = detections[0]
#       print(detections)
    print("image %g: '%s'" % (batch_i,path))
    cropped_ploted = False
    results_img_path = os.path.join(opt.output_folder, path.split('/')[-1])
    cropped_img_path = os.path.join(opt.output_folder, "cropped_"+path.split('/')[-1])

    img = np.uint8(img.transpose(1,2,0) * 255)
    img = np.ascontiguousarray(img, dtype=np.float32)

    # The amount of padding that was added
    pad_x = max(img.shape[0] - img.shape[1], 0) * (opt.img_size / max(img.shape))
    pad_y = max(img.shape[1] - img.shape[0], 0) * (opt.img_size / max(img.shape))
    # Image height and width after padding is removed
    unpad_h = opt.img_size - pad_y
    unpad_w = opt.img_size - pad_x

    # Draw bounding boxes and labels of detections
    if detections is not None:
        unique_classes = detections[:, -1].cpu().unique()
        bbox_colors = random.sample(color_list, len(unique_classes))

        results_txt_path = results_img_path + '.txt'
        if os.path.isfile(results_txt_path):
            os.remove(results_txt_path)

        for i in unique_classes:
            n = (detections[:, -1].cpu() == i).sum()
            print('%g %ss' % (n, classes[int(i)]))

        for num_card, (P1_x, P1_y, P2_x, P2_y, P3_x, P3_y, P4_x, P4_y, conf, cls_conf, cls_pred) in enumerate(detections):
            P1_y = max((((P1_y - pad_y // 2) / unpad_h) * img.shape[0]).round().item(), 0)
            P1_x = max((((P1_x - pad_x // 2) / unpad_w) * img.shape[1]).round().item(), 0)
            P2_y = max((((P2_y - pad_y // 2) / unpad_h) * img.shape[0]).round().item(), 0)
            P2_x = max((((P2_x - pad_x // 2) / unpad_w) * img.shape[1]).round().item(), 0)
            P3_y = max((((P3_y - pad_y // 2) / unpad_h) * img.shape[0]).round().item(), 0)
            P3_x = max((((P3_x - pad_x // 2) / unpad_w) * img.shape[1]).round().item(), 0)
            P4_y = max((((P4_y - pad_y // 2) / unpad_h) * img.shape[0]).round().item(), 0)
            P4_x = max((((P4_x - pad_x // 2) / unpad_w) * img.shape[1]).round().item(), 0)
            
            # write to file
            if opt.txt_out:
                with open(results_txt_path, 'a') as file:
                    file.write(('%g %g %g %g %g %g %g %g %g %g \n') % (P1_x, P1_y, P2_x, P2_y, P3_x, P3_y, P4_x, P4_y, cls_pred, cls_conf * conf))
                    
                out_fields_copy = copy(out_fields)
                out_fields_copy["file_name"] = path
                out_fields_copy["num_card"] = num_card+1
                out_fields_copy["P1_x"] = P1_x
                out_fields_copy["P1_y"] = P1_y
                out_fields_copy["P2_x"] = P2_x
                out_fields_copy["P2_y"] = P2_y
                out_fields_copy["P3_x"] = P3_x
                out_fields_copy["P3_y"] = P3_y
                out_fields_copy["P4_x"] = P4_x
                out_fields_copy["P4_y"] = P4_y
                out_fields_copy["cls_pred"] = cls_pred.int().item()
                out_fields_copy["conf"] = conf.float().item()
                out_fields_copy["cls_conf"] = cls_conf.float().item()
                out_row = pd.DataFrame(out_fields_copy,index = [0])
                out_row.to_csv(txt_out_filename, mode = "a", index = False, header = False)
                
                

            if opt.plot_flag:
                # Add the bbox to the plot
                label = '%s %.2f' % (classes[int(cls_pred)], conf)
                color = bbox_colors[int(np.where(unique_classes == int(cls_pred))[0])]
                plot_one_box([P1_x, P1_y, P2_x, P2_y, P3_x, P3_y, P4_x, P4_y], img, label=None, color=color,conf = conf)
                poly_arr = np.array([P1_x, P1_y, P2_x, P2_y, P3_x, P3_y, P4_x, P4_y]).reshape(4,2)
#                   card_width,card_height = get_distances(poly_arr)
#                   result = camscanner(poly_arr,(card_width,card_height),img)
#                   cropped_ploted = True
        # cv2.imshow(path.split('/')[-1], img)
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()

#       if opt.plot_flag:
    print("saving_file")
#       img = np.uint8(img.transpose(1,2,0))
    # Save generated image with detections

    print(img.shape,results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'),"\n",
              "IS FILE SAVED:- ",cv2.imwrite(results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), img[:,:,::-1]))
#       if cropped_ploted:
#           print(cv2.imwrite(cropped_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), result))

torch.cuda.empty_cache()
detect(opt)

## Model evaliation

In [7]:
import pandas as pd

In [8]:
df = pd.read_csv("./output_fields.csv")

In [9]:
df

Unnamed: 0,file_name,num_card,P1_x,P1_y,P2_x,P2_y,P3_x,P3_y,P4_x,P4_y,cls_pred,conf,cls_conf
0,./midv_dataset/29_irn_drvlic/images/HA/HA29_05...,1,48.0,248.0,512.0,183.0,582.0,360.0,103.0,419.0,0,0.414891,1.0
1,./midv_dataset/01_alb_id/images/KA/KA01_12.tif,1,33.0,218.0,524.0,183.0,560.0,354.0,68.0,392.0,0,0.692086,1.0
2,./midv_dataset/47_usa_bordercrossing/images/KA...,1,66.0,311.0,526.0,290.0,552.0,441.0,79.0,467.0,0,0.901315,1.0
3,./midv_dataset/41_srb_passport/images/KS/KS41_...,1,43.0,182.0,568.0,187.0,557.0,391.0,24.0,389.0,0,0.791177,1.0
4,./midv_dataset/14_deu_id_new/images/CS/CS14_15...,1,60.0,270.0,496.0,273.0,516.0,431.0,56.0,435.0,0,0.930499,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1430,./midv_dataset/29_irn_drvlic/images/KS/KS29_22...,1,64.0,215.0,566.0,223.0,561.0,415.0,48.0,400.0,0,0.302790,1.0
1431,./midv_dataset/31_jpn_drvlic/images/HA/HA31_18...,1,71.0,299.0,459.0,269.0,462.0,391.0,116.0,404.0,0,0.136218,1.0
1432,./midv_dataset/45_ukr_passport/images/KS/KS45_...,1,25.0,182.0,563.0,165.0,563.0,406.0,32.0,411.0,0,0.297593,1.0
1433,./midv_dataset/03_aut_id_old/images/CA/CA03_18...,1,37.0,286.0,394.0,192.0,493.0,315.0,135.0,408.0,0,0.213717,1.0


In [11]:
import re
from shapely.geometry import Polygon
import json

In [None]:
# List to store all IOU values along with their corresponding file paths
all_iou = []

# Iterate through each row in the DataFrame
for i, row in df.iterrows():

    # Construct the file path by appending a dot (current directory) to the 'file_name'
    each_filepath = "." + row.file_name

    # Check if the file path matches a certain pattern using regex
    if len(re.findall(r"\/\w{2}\/\w{2}\d{2}\_\d{2}\.tif$", each_filepath)) < 1:
        # If the pattern does not match, skip to the next iteration
        continue

    # Read the image using OpenCV
    img = cv2.imread(each_filepath)
    img_h, img_w = img.shape[:2]

    # Construct the JSON file path by replacing '.tif' with '.json' in 'each_filepath'
    json_path = re.sub(r"\.tif$", ".json", each_filepath)
    
    # Modify the 'json_path' to replace '/images/' with '/ground_truth/'
    json_path = re.sub("\/images\/", '/ground_truth/', json_path, re.DOTALL)

    # Create a Polygon object representing the predicted region
    pred_poly = Polygon([
        [(row.P1_x / 608) * img_w, (row.P1_y / 608) * img_h],
        [(row.P2_x / 608) * img_w, (row.P2_y / 608) * img_h],
        [(row.P3_x / 608) * img_w, (row.P3_y / 608) * img_h],
        [(row.P4_x / 608) * img_w, (row.P4_y / 608) * img_h],
    ])

    # Read the ground truth points from the JSON file
    with open(json_path, "r") as f:
        quad_json = json.load(f)
        quad_points = np.array(quad_json["quad"])

    # Create a Polygon object representing the ground truth region
    ground_truth_poly = Polygon(quad_points)

    # Calculate intersection and union areas to compute Intersection over Union (IOU)
    inter_area = ground_truth_poly.intersection(pred_poly).area
    union_area = ground_truth_poly.union(pred_poly).area
    iou = inter_area / union_area

    # Append the file path and IOU value to the list
    all_iou.append([each_filepath, iou])

In [50]:
iou_df = pd.DataFrame(all_iou, columns=["file_name", "iou"])
iou_df.head()

Unnamed: 0,file_name,iou
0,../midv_dataset/29_irn_drvlic/images/HA/HA29_0...,0.929793
1,../midv_dataset/01_alb_id/images/KA/KA01_12.tif,0.967739
2,../midv_dataset/47_usa_bordercrossing/images/K...,0.931991
3,../midv_dataset/41_srb_passport/images/KS/KS41...,0.947237
4,../midv_dataset/14_deu_id_new/images/CS/CS14_1...,0.945262


## average IOU on all the testing set images

In [51]:
iou_df.iou.mean()

0.9478585441640888

In [1]:
with open("../trainable_dataset_files/test.part", "r") as f:
    test_set = f.readlines()

In [5]:
num_of_images_in_test_set  = len(test_set)

## detection accuracy, as if object was detected, it was added in the csv file.

In [14]:
df.shape[0] / num_of_images_in_test_set

0.9768550034036759