In [1]:
import sys
import os
import json
import glob
import cv2
import tqdm
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ExifTags
import numpy as np
from PIL import ImageOps

In [2]:
CLASSES = ['numberplate']
STATES  = ["val","train"]#

PATH_TO_RES_ANN = "./npdata/labels/{}"
PATH_TO_RES_IMAGES  = "./npdata/images/{}"

PATH_TO_JSON    = "./autoriaNumberplateDataset-2021-03-01/{}/via_region_data.json"
PATH_TO_IMAGES  = "./autoriaNumberplateDataset-2021-03-01/{}"

In [3]:
%matplotlib inline
plt.rcParams["figure.figsize"] = (20,10)

In [4]:
def rotate_image_by_exif(image):
    """
    Rotate photo

    Parameters
    ----------
    image
    """
    try:
        orientation = 274  # key of orientation ExifTags
        if image._getexif() is not None:
            exif = dict(image._getexif().items())
            if orientation in exif.keys():
                if exif[orientation] == 3:
                    image = image.rotate(180, expand=True)
                    image = ImageOps.mirror(image)
                elif exif[orientation] == 6:
                    image = image.rotate(270, expand=True)
                    image = ImageOps.mirror(image)
                elif exif[orientation] == 8:
                    image = image.rotate(90, expand=True)
                    image = ImageOps.mirror(image)
    except AttributeError:
        pass
    return image

In [5]:
def load_annotations(path_to_res_ann, 
                     path_to_res_images, 
                     path_to_images, 
                     path_to_json, classes = ['numberplate'], debug=True):
    with open(path_to_json) as ann:
        annData = json.load(ann)
    cat2label = {k: i for i, k in enumerate(classes)}
    image_list = annData
    
    for _id in tqdm.tqdm(image_list["_via_img_metadata"]):
        is_corrupted = 0
        image_id = image_list["_via_img_metadata"][_id]["filename"]
        filename = f'{path_to_images}/{image_id}'
        #print(filename)
        pil_image = Image.open(filename)
        pil_image = rotate_image_by_exif(pil_image)
        image = np.array(pil_image)
        height, width, c = image.shape
        to_txt_data = []
        for region in image_list["_via_img_metadata"][_id]["regions"]:
            label_id  = 0
            #label_id = cat2label[bbox_name]
            if region["shape_attributes"].get("all_points_x", None) is None:
                continue
            if region["shape_attributes"].get("all_points_y", None) is None:
                continue
            bbox = [
                min(region["shape_attributes"]["all_points_x"]),
                min(region["shape_attributes"]["all_points_y"]),
                max(region["shape_attributes"]["all_points_x"]),
                max(region["shape_attributes"]["all_points_y"]),
            ]
            
            w = bbox[2] - bbox[0]
            h = bbox[3] - bbox[1]
            
            mx = bbox[0]+w/2
            my = bbox[1]+h/2
            
            # class x_center y_center width height
            yolo_bbox = [label_id, mx/width, my/height, w/width, h/height]
            if yolo_bbox[1] >= 1 \
                or yolo_bbox[2] >= 1 \
                or yolo_bbox[3] >= 1 \
                or yolo_bbox[4] >= 1:
                print("[corrupted]", filename, width, height)
                print(bbox)
                print(yolo_bbox)
                is_corrupted = 1
            yolo_bbox = " ".join([str(item) for item in yolo_bbox])
            to_txt_data.append(yolo_bbox)
            if debug or is_corrupted:
                cv2.rectangle(image, 
                    (int(bbox[0]), int(bbox[1])), 
                    (int(bbox[2]), int(bbox[3])), 
                    (0,120,255), 
                    3)
        
        res_path =  f'{path_to_res_ann}/{".".join(image_id.split(".")[:-1])}.txt'
        if debug or is_corrupted:
            print(res_path)
            print("\n".join(to_txt_data))
            print("______________________")
            plt.imshow(image)
            plt.show()
            pass
        else:
            with open(res_path, "w") as wFile:
                wFile.write("\n".join(to_txt_data))
            cv2.imwrite(os.path.join(path_to_res_images, os.path.basename(filename)), image)

<img src="https://user-images.githubusercontent.com/26833433/98809572-0bc4d580-241e-11eb-844e-eee756f878c2.png">

In [None]:
for state in STATES:
    path_to_res_ann    = PATH_TO_RES_ANN.format(state)
    path_to_res_images = PATH_TO_RES_IMAGES.format(state)
    
    path_to_json    = PATH_TO_JSON.format(state)
    path_to_images  = PATH_TO_IMAGES.format(state)
    
    load_annotations(path_to_res_ann, 
                     path_to_res_images,
                     path_to_images, 
                     path_to_json, 
                     debug=False)

100%|██████████| 376/376 [00:09<00:00, 37.81it/s]
 30%|███       | 1443/4776 [00:36<01:30, 36.77it/s]

In [None]:
# deleted 369353199-28729250.jpeg


In [24]:
1+1

2