In [3]:
import numpy as np
import pandas as pd
import os
import yaml

In [4]:
def convert_to_yolo_format(input_path,output_path, image_dims = [720,576]):
    '''
    converts the mesad format annotations into yolo format 
    input:
        input_path: path of the input annotations (str)
        output_path: path to save the output yolo format txt files (str)
        image_dims: the dimensions of the images (list)
    output:
        unique_labels: the unique labels in the dataset (list)
    '''
    unique_labels = []
    annotations = os.listdir(input_path)
    for ann in annotations:
        label_files = ann.split('.')
        if 'labels' in label_files:
            label_data = pd.read_csv(input_path+label_files[0]+'.bboxes.labels.tsv',sep='\t',header = None)
            bbox_data = pd.read_csv(input_path+label_files[0]+'.bboxes.tsv',sep='\t', header = None)
            if not os.path.isdir(output_path):
                os.makedirs(output_path)
            with open(output_path+label_files[0]+'.txt', 'a') as txtfile:
                for i, label in enumerate(list(label_data[0])):
                    anno = []
                    if label not in unique_labels:
                        unique_labels.append(label)
                    class_id = unique_labels.index(label)
                    bbox = bbox_data.values.tolist()[i]
                    b_center_x = (bbox[0] + bbox[2]) / 2 
                    b_center_y = (bbox[1] + bbox[3]) / 2
                    b_width    = (bbox[2] - bbox[0])
                    b_height   = (bbox[3] - bbox[1])
                    b_center_x /= image_dims[0]
                    b_center_y /= image_dims[1]
                    b_width    /= image_dims[0]
                    b_height   /= image_dims[1]
                    anno.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, b_center_x, b_center_y, b_width, b_height))
                    txtfile.write('{}\n'.format(" ".join(map(str,anno))))
            txtfile.close()
    return unique_labels

In [5]:
train_input_label_path = 'data/mesad-real/train/annotations/'
train_output_label_path = 'data/mesad-real/train/labels/'
train_unique_labels = convert_to_yolo_format(train_input_label_path,train_output_label_path)

In [6]:
val_input_label_path = 'data/mesad-real/val/annotations/'
val_output_label_path = 'data/mesad-real/val/labels/'
val_unique_labels = convert_to_yolo_format(val_input_label_path,val_output_label_path, train_unique_labels)

In [7]:
def create_training_yaml_file(train_file_path, val_file_path, unique_labels):
    '''
    auto generates the yaml file needed by yolo v5 for training
    input:
        train_file_path: the path of the training files (str)
        val_file_path: the path of the validation files (str)
        unique_labels: the unique labels in the dataset (list)
    '''
    data = dict(
    train = train_file_path,
    val = val_file_path,
    nc = len(unique_labels),
    names = unique_labels
    )
    with open('mesad-real.yml', 'w') as ymlfile:
        yaml.dump(data, ymlfile, default_flow_style=False)

In [8]:
create_training_yaml_file(train_output_label_path, val_output_label_path, train_unique_labels)