In [2]:
import shutil
import json
import numpy as np
import pandas as pd
import glob
from sklearn.model_selection import train_test_split

import time, sys
from IPython.display import clear_output

def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))

    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

### Create Dataset

In [3]:
categories_in_data = ['bicycle', 'phone', 'helmet', 'airbag_helmet']
categories_lookup = dict(zip(categories_in_data, range(len(categories_in_data))))

labels_in_path = "original_labels/"
images_in_path = "original_images/"

labels_out_path_helmet = "data/helmet/labels/"
images_out_path_helmet = "data/helmet/images/"

labels_out_path_bicycle = "data/bicycle/labels/"
images_out_path_bicycle = "data/bicycle/images/"

labels_out_path_phone = "data/phone/labels/"
images_out_path_phone = "data/phone/images/"

labels_out_path_airbag_helmet = "data/airbag_helmet/labels/"
images_out_path_airbag_helmet = "data/airbag_helmet/images/"

filenames = [a.split("/")[-1].split(".")[0] for a in glob.glob(f'{labels_in_path}*.json')]


In [17]:
count = 0
for filename in filenames:
    update_progress(count / len(filenames))

    # read original annotation
    with open(labels_in_path + filename + ".json") as f:
        data = json.load(f)

    W = data['imageWidth']
    H = data['imageHeight']

    helmet_file_counter = 1
    bicycle_file_counter = 1
    phone_file_counter = 1
    airbag_helmet_file_counter = 1
    for shape in data['shapes']:
        if shape['label'] == 'bicycle':
            labelfile_out_path = labels_out_path_bicycle + filename + f"_{bicycle_file_counter}.txt"
            imagefile_out_path = images_out_path_bicycle + filename + f"_{bicycle_file_counter}.jpg"
            bicycle_file_counter += 1
        elif shape['label'] == 'helmet':
            labelfile_out_path = labels_out_path_helmet + filename + f"_{helmet_file_counter}.txt"
            imagefile_out_path = images_out_path_helmet + filename + f"_{helmet_file_counter}.jpg"
            helmet_file_counter += 1
        elif shape['label'] == 'phone':
            labelfile_out_path = labels_out_path_phone + filename + f"_{phone_file_counter}.txt"
            imagefile_out_path = images_out_path_phone + filename + f"_{phone_file_counter}.jpg"
            phone_file_counter += 1
        elif shape['label'] == 'airbag_helmet':
            labelfile_out_path = labels_out_path_airbag_helmet + filename + f"_{airbag_helmet_file_counter}.txt"
            imagefile_out_path = images_out_path_airbag_helmet + filename + f"_{airbag_helmet_file_counter}.jpg"
            airbag_helmet_file_counter += 1
        else:
            pass

        p1_x = shape['points'][0][0]
        p1_y = shape['points'][0][1]
        p2_x = shape['points'][1][0]
        p2_y = shape['points'][1][1]

        # Ensure p1 is tl and p2 is br
        if ((p1_x > p2_x) and (p2_y > p1_y)): # p1 = tr, p2 = bl
            tl_x, tl_y = p2_x, p1_y
            br_x, br_y = p1_x, p2_y
        elif ((p2_x > p1_x) and (p1_y > p2_y)): # p1 = bl, p2 = tr
            tl_x, tl_y = p1_x, p2_y
            br_x, br_y = p2_x, p1_y
        elif ((p1_y > p2_y) and (p1_x > p2_x)): # p1 = br, p2 = tl
            tl_x, tl_y = p2_x, p2_y
            br_x, br_y = p1_x, p1_y
        else: # p1 = tl, p2 = br
            tl_x, tl_y = p1_x, p1_y
            br_x, br_y = p2_x, p2_y   

        # find center coordinates, width and height
        w = br_x - tl_x
        h = br_y - tl_y

        c_x = tl_x + w/2
        c_y = tl_y + h/2

        # ratio of image dimensions
        w = w / W
        h = h / H
        c_x = c_x / W
        c_y = c_y / H

        class_value = categories_lookup[shape['label']]

        with open(labelfile_out_path, "w+") as t:
            t.write(f"{class_value} {c_x} {c_y} {w} {h}")

        shutil.copyfile(
           images_in_path + filename.split("_")[0] + "/output_frames/" + filename + ".jpg",
           imagefile_out_path 
        )

    count += 1



Progress: [####################] 99.9%


### Create Train-Test-Splits

In [5]:
full_path_to_data = "/zhome/79/4/146412/cph_helmets/helmet_detector/"
for i in [
    images_out_path_bicycle,
    images_out_path_helmet,
    images_out_path_phone,
    images_out_path_airbag_helmet
]:

    filenames = glob.glob(i + "*.jpg")

    X = np.arange(len(filenames))
    y = X

    train_idxs, test_idxs, _, _ = train_test_split(X, y, test_size = 0.2, random_state = 42)

    with open("/".join(i.split("/")[:-2]) + "/train.txt", "w+") as t:
        for idx in train_idxs:
            t.write(full_path_to_data + filenames[idx] + "\n")

    with open("/".join(i.split("/")[:-2]) + "/val.txt", "w+") as t:
        for idx in test_idxs:
            t.write(full_path_to_data + filenames[idx] + "\n")

    