In [17]:
import os
import numpy as np
from collections import defaultdict
import pandas as pd
from tqdm import tqdm

In [22]:
yolo_part_mapping = {0: 'Traffic-sings',
 1: 'forb_ahead',
 2: 'forb_left',
 3: 'forb_overtake',
 4: 'forb_right',
 5: 'forb_speed_over_10',
 6: 'forb_speed_over_100',
 7: 'forb_speed_over_130',
 8: 'forb_speed_over_20',
 9: 'forb_speed_over_30',
 10: 'forb_speed_over_40',
 11: 'forb_speed_over_5',
 12: 'forb_speed_over_50',
 13: 'forb_speed_over_60',
 14: 'forb_speed_over_70',
 15: 'forb_speed_over_80',
 16: 'forb_speed_over_90',
 17: 'forb_stopping',
 18: 'forb_trucks',
 19: 'forb_u_turn',
 20: 'forb_weight_over_3.5t',
 21: 'forb_weight_over_7.5t',
 22: 'info_bus_station',
 23: 'info_crosswalk',
 24: 'info_highway',
 25: 'info_one_way_traffic',
 26: 'info_parking',
 27: 'info_taxi_parking',
 28: 'mand_bike_lane',
 29: 'mand_left',
 30: 'mand_left_right',
 31: 'mand_pass_left',
 32: 'mand_pass_left_right',
 33: 'mand_pass_right',
 34: 'mand_right',
 35: 'mand_roundabout',
 36: 'mand_straigh_left',
 37: 'mand_straight',
 38: 'mand_straight_right',
 39: 'prio_give_way',
 40: 'prio_priority_road',
 41: 'prio_stop',
 42: 'warn_children',
 43: 'warn_construction',
 44: 'warn_crosswalk',
 45: 'warn_cyclists',
 46: 'warn_domestic_animals',
 47: 'warn_other_dangers',
 48: 'warn_poor_road_surface',
 49: 'warn_roundabout',
 50: 'warn_slippery_road',
 51: 'warn_speed_bumper',
 52: 'warn_traffic_light',
 53: 'warn_tram',
 54: 'warn_two_way_traffic',
 55: 'warn_wild_animals'}

In [16]:
def load_labels(folder):
    labels = {}
    for filename in tqdm(os.listdir(folder)):
        if filename.endswith('.txt'):
            filepath = os.path.join(folder, filename)
            with open(filepath, 'r') as f:
                lines = f.readlines()
                image_labels = []
                for line in lines:
                    parts = list(map(float, line.strip().split()))
                    image_labels.append(parts)
                labels[filename] = image_labels
    return labels

In [18]:
sample_folder = '''D:/Projects/Interview/phonologies/assignment/data/sample_data/labels'''
sample_labels = load_labels(sample_folder)

100%|██████████| 3/3 [00:00<00:00, 1417.79it/s]


In [19]:
sample_labels

{'SNAGDEEA-0243_png_jpg.rf.3bc4ff5873d190933b14b754609d68a0.txt': [[39.0,
   0.584375,
   0.73984375,
   0.415625,
   0.4046875],
  [23.0, 0.5765625, 0.30625, 0.425, 0.465625]],
 'SNAGDEEA-0244_png_jpg.rf.b5851556c58d4e12ac9180402514ca0f.txt': [[40.0,
   0.74765625,
   0.49140625,
   0.2953125,
   0.3078125],
  [23.0, 0.46640625, 0.42890625, 0.0734375, 0.0828125]],
 'SNAGDEEA-0245_png_jpg.rf.3a53daeaa83274ca4661839d137d91c3.txt': [[40.0,
   0.52578125,
   0.4453125,
   0.4515625,
   0.459375],
  [23.0, 0.10234375, 0.35546875, 0.1078125, 0.1234375]]}

In [20]:
def count_gt_labels_per_class(gt_labels):
    class_counts = defaultdict(int)
    
    for image_id, labels in gt_labels.items():
        for label in labels:
            class_id = int(label[0])  # Assuming the first element is the class ID
            class_counts[class_id] += 1
    
    return class_counts

class_counts = count_gt_labels_per_class(sample_labels)

In [21]:
class_counts

defaultdict(int, {39: 1, 23: 3, 40: 2})

In [23]:
# convert to dataframe
class_counts_df = pd.DataFrame.from_dict(class_counts, orient='index', columns=['count'])
class_counts_df = class_counts_df.reset_index()
class_counts_df.columns = ['class_id', 'count']
class_counts_df['class_label'] = class_counts_df['class_id'].map(yolo_part_mapping)
class_counts_df

Unnamed: 0,class_id,count,class_label
0,39,1,prio_give_way
1,23,3,info_crosswalk
2,40,2,prio_priority_road


In [24]:
train_data = '''D:/Projects/Interview/phonologies/assignment/data/YOLO_Data/train/labels'''

train_labels = load_labels(train_data)
train_class_count = count_gt_labels_per_class(train_labels)

train_class_count_df = pd.DataFrame.from_dict(train_class_count, orient='index', columns=['count'])
train_class_count_df = train_class_count_df.reset_index()
train_class_count_df.columns = ['class_id', 'count']
train_class_count_df['class_label'] = train_class_count_df['class_id'].map(yolo_part_mapping)
train_class_count_df

100%|██████████| 3253/3253 [00:00<00:00, 4789.05it/s]


Unnamed: 0,class_id,count,class_label
0,4,146,forb_right
1,40,91,prio_priority_road
2,23,659,info_crosswalk
3,20,131,forb_weight_over_3.5t
4,25,213,info_one_way_traffic
5,26,206,info_parking
6,45,60,warn_cyclists
7,50,62,warn_slippery_road
8,1,169,forb_ahead
9,17,253,forb_stopping


In [25]:
test_data = '''D:/Projects/Interview/phonologies/assignment/data/YOLO_Data/test/labels'''

test_labels = load_labels(test_data)
test_class_count = count_gt_labels_per_class(test_labels)

test_class_count_df = pd.DataFrame.from_dict(test_class_count, orient='index', columns=['count'])
test_class_count_df = test_class_count_df.reset_index()
test_class_count_df.columns = ['class_id', 'count']
test_class_count_df['class_label'] = test_class_count_df['class_id'].map(yolo_part_mapping)
test_class_count_df

100%|██████████| 628/628 [00:03<00:00, 207.18it/s]


Unnamed: 0,class_id,count,class_label
0,37,10,mand_straight
1,26,48,info_parking
2,36,6,mand_straigh_left
3,23,117,info_crosswalk
4,17,47,forb_stopping
5,25,36,info_one_way_traffic
6,45,15,warn_cyclists
7,1,13,forb_ahead
8,4,17,forb_right
9,39,83,prio_give_way


In [26]:
val_data = '''D:/Projects/Interview/phonologies/assignment/data/YOLO_Data/val/labels'''

val_labels = load_labels(val_data)
val_class_count = count_gt_labels_per_class(val_labels)

val_class_count_df = pd.DataFrame.from_dict(val_class_count, orient='index', columns=['count'])
val_class_count_df = val_class_count_df.reset_index()
val_class_count_df.columns = ['class_id', 'count']
val_class_count_df['class_label'] = val_class_count_df['class_id'].map(yolo_part_mapping)
val_class_count_df

100%|██████████| 500/500 [00:00<00:00, 1214.71it/s]


Unnamed: 0,class_id,count,class_label
0,35,70,mand_roundabout
1,17,126,forb_stopping
2,1,47,forb_ahead
3,39,148,prio_give_way
4,32,22,mand_pass_left_right
5,41,66,prio_stop
6,23,80,info_crosswalk
7,29,10,mand_left
8,34,29,mand_right
9,26,14,info_parking


In [28]:
len(train_labels)

3253

In [29]:
len(test_labels)

628

In [30]:
len(val_labels)

500