# Labels Analysis

In [1]:
import os
import json
import pandas as pd
import numpy as np
from collections import Counter

### Opening JSON files (labels)

In [3]:
# Opening JSON file: Training set
f = open('ComputerVision/workspace/labels/bdd100k_labels_images_train.json')
data_labels_train = json.load(f)
f.close()

In [4]:
# Opening JSON file: Validation set
f = open('ComputerVision/workspace/labels/bdd100k_labels_images_val.json')
data_labels_val = json.load(f)
f.close()

In [5]:
print('     Train - labels: {}'.format(len(data_labels_train)))
print('Validation - labels: {}'.format(len(data_labels_val)))

     Train - labels: 69863
Validation - labels: 10000


### Objects in labels (total counts)

In [6]:
# Count categories: Training set
lst_category_train = []
n = len(data_labels_train) # Number of images

for i in range(n):
  data_tmp = data_labels_train[i]
  k = len(data_tmp['labels'])
  for j in range(k):
    lst_category_train.append(data_tmp['labels'][j]['category'])

print('\033[1mTraining set\033[0m')
Counter(lst_category_train)

[1mTraining set[0m


Counter({'traffic light': 186117,
         'traffic sign': 239686,
         'car': 713211,
         'drivable area': 125723,
         'lane': 528643,
         'person': 91349,
         'bus': 11672,
         'truck': 29971,
         'rider': 4517,
         'bike': 7210,
         'motor': 3002,
         'train': 136})

In [7]:
# Count categories: Validation set
lst_category_val = []
n = len(data_labels_val) # Number of images

for i in range(n):
  data_tmp = data_labels_val[i]
  k = len(data_tmp['labels'])
  for j in range(k):
    lst_category_val.append(data_tmp['labels'][j]['category'])

print('\033[1mValidation set\033[0m')
Counter(lst_category_val)

[1mValidation set[0m


Counter({'traffic sign': 34908,
         'traffic light': 26885,
         'car': 102506,
         'rider': 649,
         'motor': 452,
         'drivable area': 17981,
         'lane': 75730,
         'person': 13262,
         'bus': 1597,
         'truck': 4245,
         'bike': 1007,
         'train': 15})

In [8]:
# Counts
df_category_train = pd.DataFrame.from_dict(Counter(lst_category_train), orient='index', columns=['Counts'])
df_category_val  = pd.DataFrame.from_dict(Counter(lst_category_val), orient='index', columns=['Counts'])
df_category = df_category_train.merge(df_category_val, left_index=True, right_index=True, how='outer', suffixes=['_train','_val'])
df_category.sort_values(by='Counts_train', ascending=False, inplace=True)
df_category

Unnamed: 0,Counts_train,Counts_val
car,713211,102506
lane,528643,75730
traffic sign,239686,34908
traffic light,186117,26885
drivable area,125723,17981
person,91349,13262
truck,29971,4245
bus,11672,1597
bike,7210,1007
rider,4517,649


### Images with Lanes and Drivable Areas

In [27]:
# List of images with Lanes and Drivable: Training set
n = len(data_labels_train) # Number of images
lst_lane_drivable = []

for i in range(n):
  data_tmp = data_labels_train[i]
  k = len(data_tmp['labels'])
  lane = 0
  drivable = 0
  for j in range(k):
    if data_tmp['labels'][j]['category'] == 'lane':
      lane += 1
    if data_tmp['labels'][j]['category'] == 'drivable area':
      drivable += 1

  if lane > 0 and drivable > 0:
    lst_lane_drivable.append(data_tmp['name'])


print('\033[1mTraining set\033[0m')
len(lst_lane_drivable)

[1mTraining set[0m


64123

In [28]:
# List of images with Lanes and Drivable: Validation set
n = len(data_labels_val) # Number of images
lst_lane_drivable = []

for i in range(n):
  data_tmp = data_labels_val[i]
  k = len(data_tmp['labels'])
  lane = 0
  drivable = 0
  for j in range(k):
    if data_tmp['labels'][j]['category'] == 'lane':
      lane += 1
    if data_tmp['labels'][j]['category'] == 'drivable area':
      drivable += 1

  if lane > 0 and drivable > 0:
    lst_lane_drivable.append(data_tmp['name'])


print('\033[1mValidation set\033[0m')
len(lst_lane_drivable)

[1mValidation set[0m


9175