In [1]:
from glob import glob
import os

import cv2
import numpy as np
from PIL import Image
from pdf2image import convert_from_path
from imutils import resize
from tqdm import tqdm
from scoring import *

good = list(glob("../data/original/Good/*.pdf"))
ugly = list(glob("../data/original/Ugly/*.pdf"))

len(good), len(ugly)

(16, 25)

In [2]:
def process(files, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    font = cv2.FONT_HERSHEY_SIMPLEX
    fontScale = 1
    color = (255, 0, 0)
    thickness = 2

    progress = tqdm(total=len(files))

    for f in files:
        pages = convert_from_path(f, dpi=300)
        img = np.array(pages[0])
        img = im_resize(img)
        img_comp = img.copy()

        sorted_indices, contours, hierarchy_area = detect_objects(img)
        border_1 = sorted_indices[0]
        border_2 = sorted_indices[1]
        index = sorted_indices[0]
        cs = [random_rgb_color(), random_rgb_color()]

        x, y, w, h = cv2.boundingRect(contours[index])
        cv2.rectangle(img_comp, (x, y), (x+w, y+h), cs[0], 10)

        index = sorted_indices[1]
        x, y, w, h = cv2.boundingRect(contours[index])
        cv2.rectangle(img_comp, (x, y), (x+w, y+h), cs[1], 10)

        parents = hierarchy_area[((hierarchy_area[:, 4] == border_1) | (hierarchy_area[:, 4] == border_2)) & (hierarchy_area[:, 5] > 10000)]
        cv2.putText(img_comp, f'{len(hierarchy_area[hierarchy_area[:, 5] > 10000])}', (100, 100), font, fontScale, color, thickness, cv2.LINE_AA)

        area = 0
        for index in parents:
            index = int(index[0])
            x, y, w, h = cv2.boundingRect(contours[index])

            if y < int((img_comp.shape[0] * 80) / 100) < (y + h):
                c = cs[0] if index == border_1 else cs[1]
                cv2.rectangle(img_comp, (x, y), (x+w, y+h), c, 10)

                if index not in [border_2, border_1]:
                    area = area + (w * h)

        score = 1 - (area / ((img.shape[0] * img.shape[1]) / 3))  # Good if 1
        cv2.putText(img_comp, f'{score:.2f} {area:.2f}', (100, 70), font, fontScale, color, thickness, cv2.LINE_AA)
        
        cv2.imwrite(f"{output_dir}/{os.path.basename(f)}.jpg", img_comp)
        progress.update(1)

In [3]:
process(good, "../data/border-touched/good")

100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


In [4]:
process(ugly, "../data/border-touched/ugly")

100%|██████████| 25/25 [00:13<00:00,  1.84it/s]
