In [None]:
!pip install ultralytics

# Data processing

In [None]:
import json
import os
from glob import glob

image_dirs = {
    "TRAIN": "/kaggle/input/ua-detrac-dataset/content/UA-DETRAC/DETRAC_Upload/images/train",
    "VAL": "/kaggle/input/ua-detrac-dataset/content/UA-DETRAC/DETRAC_Upload/images/val",
}
 
label_dirs = {
    "TRAIN": "/kaggle/input/ua-detrac-dataset/content/UA-DETRAC/DETRAC_Upload/labels/train",
    "VAL": "/kaggle/input/ua-detrac-dataset/content/UA-DETRAC/DETRAC_Upload/labels/val",
}

In [None]:
def get_stat(name):
    image_dir = image_dirs[name]
    label_dir = label_dirs[name]
    
    all_images = glob(os.path.join(image_dir, "**", "*.jpg"), recursive=True)
    all_images = set([os.path.basename(img) for img in all_images])

    all_labels = glob(os.path.join(label_dir, "**", "*.txt"), recursive=True)
    all_annotated_images = set([os.path.basename(lbl).replace(".txt", ".jpg") for lbl in all_labels])

    print(f"{name}:")
    print(f"Всього зображень:       {len(all_images)}")
    print(f"Анотовані зображення:   {len(all_images & all_annotated_images)}")
    print(f"Неанотовані зображення: {len(all_images - all_annotated_images)}\n")

In [None]:
get_stat("TRAIN")
get_stat("VAL")

## Adding chosen number of instances of target class

In [None]:
import os
import shutil
from collections import defaultdict
from tqdm import tqdm

current_images_dir = "/kaggle/working/dataset_subset/images/train"
current_labels_dir = "/kaggle/working/dataset_subset/labels/train"
original_images_dir = "/kaggle/input/ua-detrac-dataset/content/UA-DETRAC/DETRAC_Upload/images/train"
original_labels_dir = "/kaggle/input/ua-detrac-dataset/content/UA-DETRAC/DETRAC_Upload/labels/train"

target_class = 2

instances_to_add = 500

existing_images = set(os.listdir("/kaggle/working/dataset_subset/images/train"))
added_instances = 0
added_files = set()

for label_file in tqdm(os.listdir(original_labels_dir), desc="Adding class 0 instances"):
    label_path = os.path.join(original_labels_dir, label_file)
    with open(label_path, "r") as f:
        lines = f.readlines()

    if any(line.startswith(f"{target_class} ") for line in lines):
        image_file = label_file.replace(".txt", ".jpg")
        if image_file not in existing_images and label_file not in added_files:
            shutil.copy(os.path.join(original_images_dir, image_file),
                        os.path.join(current_images_dir, image_file))
            shutil.copy(label_path,
                        os.path.join(current_labels_dir, label_file))
            added_instances += sum(1 for line in lines if line.startswith(f"{target_class} "))
            added_files.add(label_file)

            if added_instances >= instances_to_add:
                break

print(f"Додано {added_instances} інстансів класу {target_class} з {len(added_files)} нових зображень.")


## Coping created subset from Kaggle input into working directory

In [None]:
import shutil
import os

train_images_dir = "/kaggle/input/subset-ua-detrac/train_img/kaggle/working/dataset_subset/images/train"
train_labels_dir = "/kaggle/input/subset-ua-detrac/labels/kaggle/working/dataset_subset/labels/train"
subset_images_dir = "/kaggle/working/dataset_subset/images/train"
subset_labels_dir = "/kaggle/working/dataset_subset/labels/train"

os.makedirs(subset_images_dir, exist_ok=True)
os.makedirs(subset_labels_dir, exist_ok=True)

image_files = sorted(os.listdir(train_images_dir))

subset_size = int(len(image_files))
subset_files = image_files[:subset_size]

for file in subset_files:
    src_image_path = os.path.join(train_images_dir, file)
    dst_image_path = os.path.join(subset_images_dir, file)
    shutil.copyfile(src_image_path, dst_image_path)
    
    label_file = file.replace(".jpg", ".txt")
    src_label_path = os.path.join(train_labels_dir, label_file)
    dst_label_path = os.path.join(subset_labels_dir, label_file)
    
    if os.path.exists(src_label_path):
        shutil.copyfile(src_label_path, dst_label_path)

print("К-сть зображень:", len(os.listdir(subset_images_dir)))
print("К-сть анотацій:", len(os.listdir(subset_labels_dir)))

In [None]:
import shutil
import os

train_images_dir = "/kaggle/input/subset-ua-detrac/val_v2/kaggle/working/dataset_subset/images/val"
train_labels_dir = "/kaggle/input/subset-ua-detrac/val_v2/kaggle/working/dataset_subset/labels/val"
subset_images_dir = "/kaggle/working/dataset_subset/images/val"
subset_labels_dir = "/kaggle/working/dataset_subset/labels/val"

os.makedirs(subset_images_dir, exist_ok=True)
os.makedirs(subset_labels_dir, exist_ok=True)

image_files = sorted(os.listdir(train_images_dir))

subset_size = int(len(image_files))
subset_files = image_files[:subset_size]

for file in subset_files:
    src_image_path = os.path.join(train_images_dir, file)
    dst_image_path = os.path.join(subset_images_dir, file)
    shutil.copyfile(src_image_path, dst_image_path)
    
    label_file = file.replace(".jpg", ".txt")
    src_label_path = os.path.join(train_labels_dir, label_file)
    dst_label_path = os.path.join(subset_labels_dir, label_file)
    
    if os.path.exists(src_label_path):
        shutil.copyfile(src_label_path, dst_label_path)

print("К-сть зображень:", len(os.listdir(subset_images_dir)))
print("К-сть анотацій:", len(os.listdir(subset_labels_dir)))

In [None]:
import os
from collections import defaultdict

def count_class_instances(label_dir, num_classes=4):
    class_counts = defaultdict(int)
    file_counter = 0
    for file in os.listdir(label_dir):
        file_counter += 1
        if file.endswith(".txt"):
            with open(os.path.join(label_dir, file), "r") as f:
                lines = f.readlines()
                for line in lines:
                    parts = line.strip().split()
                    if parts:
                        class_id = int(parts[0])
                        class_counts[class_id] += 1

    print("К-сть файлів: ", file_counter)

    for class_id in range(num_classes):
        count = class_counts[class_id]
        print(f"Клас {class_id}: {count} інстансів")

label_path = "/kaggle/working/dataset_subset/labels/train" 

count_class_instances(label_path, num_classes=4)

In [None]:
import os
from collections import defaultdict

def count_class_instances(label_dir, num_classes=4):
    class_counts = defaultdict(int)
    file_counter = 0
    for file in os.listdir(label_dir):
        file_counter += 1
        if file.endswith(".txt"):
            with open(os.path.join(label_dir, file), "r") as f:
                lines = f.readlines()
                for line in lines:
                    parts = line.strip().split()
                    if parts:
                        class_id = int(parts[0])
                        class_counts[class_id] += 1
    print("К-сть файлів: ", file_counter)

    for class_id in range(num_classes):
        count = class_counts[class_id]
        print(f"Клас {class_id}: {count} інстансів")

label_path = "/kaggle/working/dataset_subset/labels/val"  

count_class_instances(label_path, num_classes=4)

# Preview

In [None]:
import json
import cv2
import matplotlib.pyplot as plt
import random
import os

category_colors = {
    "truck (others)": (0, 0, 255), 
    "car": (255, 255, 0),
    "van": (0, 255, 255),
    "bus": (255, 0, 255)
}

def load_annotations(label_file):
    annotations = []
    with open(label_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            class_id = int(parts[0])
            bbox = list(map(float, parts[1:]))  
            class_name = list(category_colors.keys())[class_id]
            annotations.append((bbox, class_name))
    return annotations

def plot_bounding_boxes(image_path, annotations):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)    

    height, width, _ = image.shape
    
    for bbox, class_name in annotations:
        x_center, y_center, box_width, box_height = bbox
        x_min = int((x_center - box_width / 2) * width)
        y_min = int((y_center - box_height / 2) * height)
        x_max = int((x_center + box_width / 2) * width)
        y_max = int((y_center + box_height / 2) * height)

        color = category_colors.get(class_name, (255, 255, 255))

        thickness = 2
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, thickness)
        label = f"{class_name}"
        cv2.putText(
            image, label, (int(x_min), int(y_min) - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA
        )

    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    plt.axis("off")
    plt.show()

image_name = os.listdir("/kaggle/working/dataset_subset/images/train")[97]
image_path = os.path.join("/kaggle/working/dataset_subset/images/train", image_name)

label_name = image_name.replace(".jpg", ".txt")
label_path = os.path.join("/kaggle/working/dataset_subset/labels/train", label_name)

annotations = load_annotations(label_path)
print(annotations)
plot_bounding_boxes(image_path, annotations)

## Creating YAML file for Yolo

In [None]:
dataset_yaml = """
train: /kaggle/working/dataset_subset/images/train
val: /kaggle/working/dataset_subset/images/val

nc: 4
names:
  0: truck
  1: car
  2: van
  3: bus
"""

with open("dataset.yaml", "w") as f:
    f.write(dataset_yaml)

# Model initialization

In [None]:
from ultralytics import YOLO

model = YOLO("yolo11s.yaml")

## Model training with custom parameters

In [None]:
model.train(data="/kaggle/working/dataset.yaml", 
            epochs=50, 
            imgsz=640, 
            batch=16, 
            auto_augment=True,
            erasing=0.0,
            mosaic = 1.0,
            close_mosaic=False,
            iou=0.5, 
            multi_scale=True, 
            optimizer="AdamW", 
            lr0=0.001, 
            cos_lr=True, 
            momuntum=0.8, 
            weight_decay=0.0005, 
            mixup=0.2,
            cutmix=0.1,
            dropout=0.15)