Transforms the HELMET data set such that darknet can use it for training

for `classes`, choose between one of the three possible configurations (see the class dictionaries further down)

`original_dir` is the path to the helmet data folder, from which the folders `annotation` and `image` are visible. Annotations and all image parts must be unzipped beforehand

`target_dir` is the path to the folder where you want the transformed data set. Haven't tested if `target_dir` = `original_dir` works. If you want to try then perhaps outcomment the two calls to `shutil.copy2` since the images don't need to be copied

`hpc_dir` is the path to the location where the transformed data will be placed on HPC. This must be accurate or darknet won't be able to find the images. In case of a mistake, doing "find and replace" on `train.txt`, `test.txt` and `validation.txt` should remedy the issue

In [1]:
import os
import csv
from collections import OrderedDict
import shutil
import zipfile
import pandas as pd

In [2]:
classes = "singleclass" #choose singleclass / multiclass / fullclass

original_dir = "D:/Data/Helmet_Dataset/"
target_dir = "D:/Data/Helmet_Dataset_" + classes + "/"
hpc_dir = "/work3/s174508/Helmet_Dataset_" + classes + "/"

In [3]:
dict_classes = None

if classes == "singleclass":
    dict_classes = {
    "DHelmet":0,
    "DNoHelmet":0,
    "DHelmetP0Helmet":0,
    "DHelmetP0NoHelmet":0,
    "DNoHelmetP0NoHelmet":0,
    "DHelmetP0HelmetP1Helmet":0,
    "DHelmetP0NoHelmetP1Helmet":0,
    "DHelmetP0NoHelmetP1NoHelmet":0,
    "DNoHelmetP0HelmetP1NoHelmet":0,
    "DNoHelmetP0NoHelmetP1Helmet":0,
    "DNoHelmetP0NoHelmetP1NoHelmet":0,
    "DHelmetP0HelmetP1HelmetP2Helmet":0,
    "DHelmetP0HelmetP1NoHelmetP2Helmet":0,
    "DHelmetP0HelmetP1NoHelmetP2NoHelmet":0,
    "DHelmetP0NoHelmetP1HelmetP2Helmet":0,
    "DHelmetP0NoHelmetP1NoHelmetP2Helmet":0,
    "DHelmetP0NoHelmetP1NoHelmetP2NoHelmet":0,
    "DNoHelmetP0NoHelmetP1NoHelmetP2Helmet":0,
    "DNoHelmetP0NoHelmetP1NoHelmetP2NoHelmet":0,
    "DHelmetP0NoHelmetP1NoHelmetP2NoHelmetP3Helmet":0,
    "DHelmetP0NoHelmetP1NoHelmetP2NoHelmetP3NoHelmet":0,
    "DNoHelmetP0NoHelmetP1NoHelmetP2NoHelmetP3NoHelmet":0,
    "DHelmetP1Helmet":0,
    "DHelmetP1NoHelmet":0,
    "DNoHelmetP1Helmet":0,
    "DNoHelmetP1NoHelmet":0,
    "DHelmetP1HelmetP2Helmet":0,
    "DHelmetP1HelmetP2NoHelmet":0,
    "DHelmetP1NoHelmetP2Helmet":0,
    "DHelmetP1NoHelmetP2NoHelmet":0,
    "DNoHelmetP1HelmetP2Helmet":0,
    "DNoHelmetP1NoHelmetP2Helmet":0,
    "DNoHelmetP1NoHelmetP2NoHelmet":0,
    "DHelmetP1NoHelmetP2NoHelmetP3Helmet":0,
    "DHelmetP1NoHelmetP2NoHelmetP3NoHelmet":0,
    "DNoHelmetP1NoHelmetP2NoHelmetP3NoHelmet":0
    }
elif classes == "multiclass":
    dict_classes = {
    "DHelmet":0,
    "DNoHelmet":0,
    "DHelmetP0Helmet":1,
    "DHelmetP0NoHelmet":1,
    "DNoHelmetP0NoHelmet":1,
    "DHelmetP0HelmetP1Helmet":2,
    "DHelmetP0NoHelmetP1Helmet":2,
    "DHelmetP0NoHelmetP1NoHelmet":2,
    "DNoHelmetP0HelmetP1NoHelmet":2,
    "DNoHelmetP0NoHelmetP1Helmet":2,
    "DNoHelmetP0NoHelmetP1NoHelmet":2,
    "DHelmetP0HelmetP1HelmetP2Helmet":3,
    "DHelmetP0HelmetP1NoHelmetP2Helmet":3,
    "DHelmetP0HelmetP1NoHelmetP2NoHelmet":3,
    "DHelmetP0NoHelmetP1HelmetP2Helmet":3,
    "DHelmetP0NoHelmetP1NoHelmetP2Helmet":3,
    "DHelmetP0NoHelmetP1NoHelmetP2NoHelmet":3,
    "DNoHelmetP0NoHelmetP1NoHelmetP2Helmet":3,
    "DNoHelmetP0NoHelmetP1NoHelmetP2NoHelmet":3,
    "DHelmetP0NoHelmetP1NoHelmetP2NoHelmetP3Helmet":4,
    "DHelmetP0NoHelmetP1NoHelmetP2NoHelmetP3NoHelmet":4,
    "DNoHelmetP0NoHelmetP1NoHelmetP2NoHelmetP3NoHelmet":4,
    "DHelmetP1Helmet":5,
    "DHelmetP1NoHelmet":5,
    "DNoHelmetP1Helmet":5,
    "DNoHelmetP1NoHelmet":5,
    "DHelmetP1HelmetP2Helmet":6,
    "DHelmetP1HelmetP2NoHelmet":6,
    "DHelmetP1NoHelmetP2Helmet":6,
    "DHelmetP1NoHelmetP2NoHelmet":6,
    "DNoHelmetP1HelmetP2Helmet":6,
    "DNoHelmetP1NoHelmetP2Helmet":6,
    "DNoHelmetP1NoHelmetP2NoHelmet":6,
    "DHelmetP1NoHelmetP2NoHelmetP3Helmet":7,
    "DHelmetP1NoHelmetP2NoHelmetP3NoHelmet":7,
    "DNoHelmetP1NoHelmetP2NoHelmetP3NoHelmet":7
    }
elif classes == "fullclass":
    dict_classes = {
    "DHelmet":0,
    "DNoHelmet":1,
    "DHelmetP0Helmet":2,
    "DHelmetP0NoHelmet":3,
    "DNoHelmetP0NoHelmet":4,
    "DHelmetP0HelmetP1Helmet":5,
    "DHelmetP0NoHelmetP1Helmet":6,
    "DHelmetP0NoHelmetP1NoHelmet":7,
    "DNoHelmetP0HelmetP1NoHelmet":8,
    "DNoHelmetP0NoHelmetP1Helmet":9,
    "DNoHelmetP0NoHelmetP1NoHelmet":10,
    "DHelmetP0HelmetP1HelmetP2Helmet":11,
    "DHelmetP0HelmetP1NoHelmetP2Helmet":12,
    "DHelmetP0HelmetP1NoHelmetP2NoHelmet":13,
    "DHelmetP0NoHelmetP1HelmetP2Helmet":14,
    "DHelmetP0NoHelmetP1NoHelmetP2Helmet":15,
    "DHelmetP0NoHelmetP1NoHelmetP2NoHelmet":16,
    "DNoHelmetP0NoHelmetP1NoHelmetP2Helmet":17,
    "DNoHelmetP0NoHelmetP1NoHelmetP2NoHelmet":18,
    "DHelmetP0NoHelmetP1NoHelmetP2NoHelmetP3Helmet":19,
    "DHelmetP0NoHelmetP1NoHelmetP2NoHelmetP3NoHelmet":20,
    "DNoHelmetP0NoHelmetP1NoHelmetP2NoHelmetP3NoHelmet":21,
    "DHelmetP1Helmet":22,
    "DHelmetP1NoHelmet":23,
    "DNoHelmetP1Helmet":24,
    "DNoHelmetP1NoHelmet":25,
    "DHelmetP1HelmetP2Helmet":26,
    "DHelmetP1HelmetP2NoHelmet":27,
    "DHelmetP1NoHelmetP2Helmet":28,
    "DHelmetP1NoHelmetP2NoHelmet":29,
    "DNoHelmetP1HelmetP2Helmet":30,
    "DNoHelmetP1NoHelmetP2Helmet":31,
    "DNoHelmetP1NoHelmetP2NoHelmet":32,
    "DHelmetP1NoHelmetP2NoHelmetP3Helmet":33,
    "DHelmetP1NoHelmetP2NoHelmetP3NoHelmet":34,
    "DNoHelmetP1NoHelmetP2NoHelmetP3NoHelmet":35
    }

In [4]:
def get_subpath(folder_name):
    for i in range(1,8):
        if os.path.isdir(original_dir + "image/part_" + str(i) + "/" + folder_name):
            return "part_" + str(i) + "/" + folder_name + "/"
    raise NotADirectoryError("No directory matching " + folder_name)    

In [5]:
def make_annotation_files(subpath, folder_name):
    
    files = [None for i in range(100)]
    
    csv_org_dir = original_dir + "annotation/"
    csv_file = open(csv_org_dir + folder_name + ".csv")
    annotations = csv.DictReader(csv_file)
    
    
    for row in annotations:
        frame = int(row["frame_id"])
        
        if not files[frame-1]:
            if frame < 10:
                files[frame-1] = open("{}{}0{}.txt".format(target_dir, subpath, frame), "w")
            else:
                files[frame-1] = open("{}{}{}.txt".format(target_dir, subpath, frame), "w")
        
        label = dict_classes[row["label"]]
        x = (int(row["x"]) + int(row["w"])/2) / 1920
        y = (int(row["y"]) + int(row["h"])/2) / 1080
        w = int(row["w"]) / 1920
        h = int(row["h"]) / 1080
        files[frame-1].write("{} {} {} {} {}\n".format(label, x, y, w, h))
    
    csv_file.close()
    for f in files:
        if f:
            f.close()
    
    return files

In [6]:
def convert_data():
    data_split = open(original_dir + "data_split.csv", "r")
    os.mkdir(target_dir)
    for i in range(1, 8):
        os.mkdir(target_dir + "part_" + str(i))
    train = open(target_dir + "train.txt", "w")
    test = open(target_dir + "test.txt", "w")
    validation = open(target_dir + "validation.txt", "w")
    
    header = True
    
    for line in data_split:
        if header:
            header = False
        else:
            split_info = line.split(",")
            folder_name = split_info[0]
            folder_set = split_info[1]
            subpath = get_subpath(folder_name) # part_x/folder_name/
            
            # Make annotation files
            os.mkdir(target_dir + subpath)
            files = make_annotation_files(subpath, folder_name)
            
            # Copy relevant image files and make train, test and validation files
            for i in range(1, 10):
                if files[i-1]:
                    image_name = "0" + str(i) + ".jpg"
                    
                    shutil.copy2(original_dir + "image/" + subpath + image_name, target_dir + subpath + image_name)
                    
                    if folder_set == "training\n":
                        train.write(hpc_dir + subpath + image_name + "\n")
                    elif folder_set == "test\n":
                        test.write(hpc_dir + subpath + image_name + "\n")
                    if folder_set == "validation\n":
                        validation.write(hpc_dir + subpath + image_name + "\n")
            
            for i in range(10, 101):
                if files[i-1]:
                    image_name = str(i) + ".jpg"
                    
                    shutil.copy2(original_dir + "image/" + subpath + image_name, target_dir + subpath + image_name)
                    
                    if folder_set == "training\n":
                        train.write(hpc_dir + subpath + image_name + "\n")
                    elif folder_set == "test\n":
                        test.write(hpc_dir + subpath + image_name + "\n")
                    if folder_set == "validation\n":
                        validation.write(hpc_dir + subpath + image_name + "\n")
    
    data_split.close()
    train.close()
    test.close()
    validation.close()

In [7]:
convert_data()