<a href="https://colab.research.google.com/github/skj092/Lab/blob/main/bdd_data_processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Downloading the dataset

In [2]:
!cp kaggle.json /root/.kaggle 

In [3]:
!kaggle datasets download -d solesensei/solesensei_bdd100k

Downloading solesensei_bdd100k.zip to /content
100% 7.60G/7.61G [00:53<00:00, 224MB/s]
100% 7.61G/7.61G [00:53<00:00, 154MB/s]


In [6]:
!unzip -q /content/solesensei_bdd100k.zip

In [7]:
!rm /content/solesensei_bdd100k.zip

In [9]:
!rm -r /content/bdd100k_seg

# Formatting the dataset

In [11]:
!mkdir labels
!mkdir labels/valids/
!mkdir labels/trains/

In [13]:
import os
import json
import argparse
from tqdm import tqdm



def bdd2coco_detection(id_dict, labeled_images, fn):

    images = list()
    annotations = list()

    counter = 0
    for i in tqdm(labeled_images):
        counter += 1
        image = dict()
        image['file_name'] = i['name']
        image['height'] = 720
        image['width'] = 1280

        image['id'] = counter

        empty_image = True

        for label in i['labels']:
            annotation = dict()
            category=label['category']
            if (category == "traffic light"):
                color = label['attributes']['trafficLightColor']
                category = "tl_" + color
            if category in id_dict.keys():
                empty_image = False
                annotation["iscrowd"] = 0
                annotation["image_id"] = image['id']
                x1 = label['box2d']['x1']
                y1 = label['box2d']['y1']
                x2 = label['box2d']['x2']
                y2 = label['box2d']['y2']
                annotation['bbox'] = [x1, y1, x2-x1, y2-y1]
                annotation['area'] = float((x2 - x1) * (y2 - y1))
                annotation['category_id'] = id_dict[category]
                annotation['ignore'] = 0
                annotation['id'] = label['id']
                annotation['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
                annotations.append(annotation)

        if empty_image:
            continue

        images.append(image)

    attr_dict["images"] = images
    attr_dict["annotations"] = annotations
    attr_dict["type"] = "instances"

    print('saving...')
    json_string = json.dumps(attr_dict)
    with open(fn, "w") as file:
        file.write(json_string)


if __name__ == '__main__':

    label_dir="/content/bdd100k_labels_release/bdd100k/labels"
    save_path="labels/"

    attr_dict = dict()
    attr_dict["categories"] = [
        {"supercategory": "none", "id": 1, "name": "person"},
        {"supercategory": "none", "id": 2, "name": "rider"},
        {"supercategory": "none", "id": 3, "name": "car"},
        {"supercategory": "none", "id": 4, "name": "bus"},
        {"supercategory": "none", "id": 5, "name": "truck"},
        {"supercategory": "none", "id": 6, "name": "bike"},
        {"supercategory": "none", "id": 7, "name": "motor"},
        {"supercategory": "none", "id": 8, "name": "tl_green"},
        {"supercategory": "none", "id": 9, "name": "tl_red"},
        {"supercategory": "none", "id": 10, "name": "tl_yellow"},
        {"supercategory": "none", "id": 11, "name": "tl_none"},
        {"supercategory": "none", "id": 12, "name": "traffic sign"},
        {"supercategory": "none", "id": 13, "name": "train"}
    ]

    attr_id_dict = {i['name']: i['id'] for i in attr_dict['categories']}

    # create BDD training set detections in COCO format
    print('Loading training set...')
    with open(os.path.join(label_dir,
                           'bdd100k_labels_images_train.json')) as f:
        train_labels = json.load(f)
    print('Converting training set...')

    out_fn = os.path.join(save_path,
                          'bdd100k_labels_images_det_coco_train.json')
    bdd2coco_detection(attr_id_dict, train_labels, out_fn)

    print('Loading validation set...')
    # create BDD validation set detections in COCO format
    with open(os.path.join(label_dir,
                           'bdd100k_labels_images_val.json')) as f:
        val_labels = json.load(f)
    print('Converting validation set...')

    out_fn = os.path.join(save_path,
                          'bdd100k_labels_images_det_coco_val.json')
    bdd2coco_detection(attr_id_dict, val_labels, out_fn)

Loading training set...
Converting training set...


100%|██████████| 69863/69863 [00:16<00:00, 4252.21it/s]


saving...
Loading validation set...
Converting validation set...


100%|██████████| 10000/10000 [00:01<00:00, 9908.14it/s]


saving...


In [14]:
#-*-coding:utf-8-*-
# python3 example.py --datasets COCO --img_path /home/iav/code/bdd100k_yolov5/bdd100k_images/bdd100k/images/100k/train --label labels/bdd100k_labels_images_det_coco_val.json --convert_output_path train_labels/ --img_type ".jpg" --manipast_path ./ --cls_list_file bdd100k.names

import os
from xml.etree.ElementTree import dump
import json
import pprint

import argparse

from format import COCO,YOLO

def main(config):

    if config["datasets"] == "COCO":
        coco = COCO()
        yolo = YOLO(os.path.abspath(config["cls_list"]))

        flag, data = coco.parse(config["label"])

        if flag == True:
            flag, data = yolo.generate(data)

            if flag == True:
                flag, data = yolo.save(data, config["output_path"], config["img_path"],
                                        config["img_type"], config["manipast_path"])

                if flag == False:
                    print("Saving Result : {}, msg : {}".format(flag, data))

            else:
                print("YOLO Generating Result : {}, msg : {}".format(flag, data))

        else:
            print("COCO Parsing Result : {}, msg : {}".format(flag, data))

    else:
        print("Unkwon Datasets")

if __name__ == '__main__':

#     config ={
#         "datasets": "COCO",
#         "img_path": "bdd100k_images/bdd100k/images/100k/train",
#         "label": "labels/bdd100k_labels_images_det_coco_train.json",
#         "img_type": ".jpg",
#         "manipast_path": "./",
#         "output_path": "labels/trains/",
#         "cls_list": "data/bdd100k.names",
#     }
    config ={
        "datasets": "COCO",
        "img_path": "/content/bdd100k/bdd100k/images/100k/val",
        "label": "/content/labels/bdd100k_labels_images_det_coco_val.json",
        "img_type": ".jpg",
        "manipast_path": "./",
        "output_path": "labels/valids/",
        "cls_list": "bdd100k.names",
    }

    main(config)


COCO Parsing:  |████████████████████████████████████████| 100.0% (185526/185526)  Complete


YOLO Generating:|████████████████████████████████████████| 100.0% (10000/10000)  Complete


YOLO Saving:   |████████████████████████████████████████| 100.0% (10000/10000)  Complete



In [None]:
#-*-coding:utf-8-*-
# python3 example.py --datasets COCO --img_path /home/iav/code/bdd100k_yolov5/bdd100k_images/bdd100k/images/100k/train --label labels/bdd100k_labels_images_det_coco_val.json --convert_output_path train_labels/ --img_type ".jpg" --manipast_path ./ --cls_list_file bdd100k.names

import os
from xml.etree.ElementTree import dump
import json
import pprint

import argparse

from format import COCO,YOLO

def main(config):

    if config["datasets"] == "COCO":
        coco = COCO()
        yolo = YOLO(os.path.abspath(config["cls_list"]))

        flag, data = coco.parse(config["label"])

        if flag == True:
            flag, data = yolo.generate(data)

            if flag == True:
                flag, data = yolo.save(data, config["output_path"], config["img_path"],
                                        config["img_type"], config["manipast_path"])

                if flag == False:
                    print("Saving Result : {}, msg : {}".format(flag, data))

            else:
                print("YOLO Generating Result : {}, msg : {}".format(flag, data))

        else:
            print("COCO Parsing Result : {}, msg : {}".format(flag, data))

    else:
        print("Unkwon Datasets")

if __name__ == '__main__':

    config ={
        "datasets": "COCO",
        "img_path": "/content/bdd100k/bdd100k/images/100k/train",
        "label": "/content/labels/bdd100k_labels_images_det_coco_train.json",
        "img_type": ".jpg",
        "manipast_path": "./",
        "output_path": "labels/trains/",
        "cls_list": "bdd100k.names",
    }


    main(config)

In [21]:
import glob, os, shutil

'''
Sometimes your image data set might not match with your label data set.
This code does the folowing
(1) Go through your image data set
(2) Search if the corresponding label file exist in the label data set. 
(3) If not, remove current image
'''


def copy_filter(label_dir,image_dir,target_dir_images,target_dir_labels):
    for image in os.listdir(image_dir):
        if image.endswith('jpg'):
            image_name = os.path.splitext(image)[0]

            # Corresponding label file name
            label_name = image_name + '.txt'
            image_path = image_dir + '/' + image_name + '.jpg'
            if os.path.isfile(label_dir + '/' + label_name) == False:
                print(" -- DELETE IMAGE [Label file not found -- ]")
                
                print(image_path)
#                 os.remove(image_path)
#             else:
                target_images=target_dir_images+ '/' + image_name + '.jpg'
                shutil.copy(image_path,target_dir_images )
                print(" --COPY IMAGE "+target_images)


    for label in os.listdir(label_dir):
        if label.endswith('.txt'):
            label_name = os.path.splitext(label)[0]

            # Corresponding label file name
            image_name = label_name + '.jpg'
            label_path = label_dir + '/' + label_name + '.txt'
            if os.path.isfile(image_dir + '/' + image_name) == False:
                print(" -- DELETE LABEL [Image file not found -- ]")
                print(label_path)
#                 os.remove(label_path)
#             else:
                target_labels=target_dir_labels+ '/' + label_name + '.txt'
                shutil.copy(label_path,target_labels )
                print(" --COPY lABELS "+target_labels)

In [22]:
!mkdir bdd100k/images/
!mkdir bdd100k/labels/

mkdir: cannot create directory ‘bdd100k/images/’: File exists
mkdir: cannot create directory ‘bdd100k/labels/’: File exists


In [23]:
!mkdir bdd100k/images/trains
!mkdir bdd100k/labels/trains
!mkdir bdd100k/images/valids
!mkdir bdd100k/labels/valids

mkdir: cannot create directory ‘bdd100k/images/trains’: File exists
mkdir: cannot create directory ‘bdd100k/labels/trains’: File exists
mkdir: cannot create directory ‘bdd100k/images/valids’: File exists
mkdir: cannot create directory ‘bdd100k/labels/valids’: File exists


In [24]:
# label_dir = '/labels/trains'
# image_dir = '/content/bdd100k/bdd100k/images/100k/train'
# target_dir_images="bdd100k/images/trains"
# target_dir_labels="bdd100k/labels/trains"
# copy_filter(label_dir,image_dir,target_dir_images,target_dir_labels)

In [30]:
label_dir = '/content/labels/valids'
image_dir = '/content/bdd100k/bdd100k/images/100k/val'
target_dir_images="bdd100k/images/valids"
target_dir_labels="bdd100k/labels/valids"
copy_filter(label_dir,image_dir,target_dir_images,target_dir_labels)

In [29]:
labels_list=glob.glob("/content/labels/valids"+"/"+"*.txt")
images_list=glob.glob("bdd100k/images/valids"+"/"+"*.jpg")
print(len(labels_list))
print(len(images_list))

# labels_list2=glob.glob("bdd100k/labels/trains"+"/"+"*.txt")
# images_list2=glob.glob("bdd100k/images/trains"+"/"+"*.jpg")
# print(len(labels_list2))
# print(len(images_list2))

10000
10000


In [None]:
# copy test_img folder
!cp -r bdd100k_images/bdd100k/images/100k/test/ bdd100k/images/