# Github Clone

In [None]:
#!git clone https://github.com/facebookresearch/detectron2 detectron2_repo
!pip install -e detectron2_repo
!pip install ultralytics

# Convert YOLO to COCO format

In [None]:
def convert_yolo_to_coco(images_dir, labels_dir, output_json, class_names):
    images = []
    annotations = []
    ann_id = 0

    # Iterate over the label files in the labels directory
    for img_id, label_file in enumerate(os.listdir(labels_dir)):
        if label_file.endswith(".txt"):
            image_id = label_file.replace(".txt", "")
            img_path = os.path.join(images_dir, image_id + ".jpg")

            if not os.path.exists(img_path):
                print(f"Image {img_path} not found, skipping.")
                continue
            try:
                image = Image.open(img_path)
            except:
                continue
                
            width, height = image.size

            images.append({
                "file_name": image_id + ".jpg",
                "height": height,
                "width": width,
                "id": img_id
            })

            with open(os.path.join(labels_dir, label_file)) as f:
                for line in f:
                    class_id, x_center, y_center, bbox_width, bbox_height = map(float, line.split())

                    x_min = (x_center - bbox_width / 2) * width
                    y_min = (y_center - bbox_height / 2) * height
                    w = bbox_width * width
                    h = bbox_height * height

                    annotations.append({
                        "id": ann_id,
                        "image_id": img_id,
                        "category_id": int(class_id),
                        "bbox": [x_min, y_min, w, h],
                        "area": w * h,
                        "iscrowd": 0
                    })
                    ann_id += 1

    categories = [{"id": i, "name": name} for i, name in enumerate(class_names)]

    coco_format = {
        "images": images,
        "annotations": annotations,
        "categories": categories
    }

    with open(output_json, 'w') as outfile:
        json.dump(coco_format, outfile, indent=4)

# Example usage
images_dir = "/kaggle/input/dataset4/train/images"
labels_dir = "/kaggle/input/dataset4/train/labels"
output_json = "/kaggle/working/coco_train_annotations.json"
class_names = ["suv", "car", "truck"]  # Replace with your actual class names

#convert_yolo_to_coco(images_dir, labels_dir, output_json, class_names)


# Detectron 2

## Libraries

In [None]:
# Run after restart session
import os
import cv2
import time
import json
import torch
import shutil
import warnings
import detectron2
import numpy as np
import tracemalloc
from PIL import Image
import matplotlib.pyplot as plt
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.structures import Boxes
from IPython.display import clear_output
from detectron2.utils.logger import setup_logger
from detectron2.data import build_detection_test_loader
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
warnings.filterwarnings("ignore", category=FutureWarning, message=".*torch.load.*weights_only=False.*")
setup_logger()

## Dataset Path

In [None]:
# Register the dataset
train_img_path = '/kaggle/input/dataset4/train/images'
train_json_path = '/kaggle/working/coco_train_annotations.json'
register_coco_instances("my_train_dataset", {}, train_json_path, train_img_path)

val_img_path = '/kaggle/input/dataset4/val/images'
val_json_path = '/kaggle/working/coco_val_annotations.json'
register_coco_instances("my_val_dataset", {}, val_json_path, val_img_path)

test_img_path = '/kaggle/input/dataset4/test/images'
test_json_path = '/kaggle/working/coco_test_annotations.json'
register_coco_instances("my_test_dataset", {}, test_json_path, test_img_path)

train_data = "my_train_dataset"
val_data = "my_val_dataset"
test_data = "my_test_dataset"

## Setup Config

In [None]:
def setup_config(model, output_dir, train_data=train_data, val_data=val_data):
    cfg = get_cfg()

    cfg.merge_from_file(model_zoo.get_config_file(model))
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model)
    cfg.DATASETS.TRAIN = (train_data,)
    cfg.DATASETS.TEST = (val_data,)
    cfg.TEST.EVAL_PERIOD = 500
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.SOLVER.IMS_PER_BATCH = 16
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3  # Number of classes for Faster RCNN
    cfg.MODEL.RETINANET.NUM_CLASSES = 3  # Number of classes for Retina Net
    cfg.MODEL.MASK_ON = False
    cfg.MODEL.LOAD_PROPOSALS = False

    # Debug/verbose
    setup_logger()
    #logger = logging.getLogger("detectron2")
    #logger.setLevel(logging.DEBUG)
    #logger.info("Training has started.")
    #logger.debug("This is a detailed debug message.")

    # Create the output directory
    cfg.OUTPUT_DIR = "/kaggle/working/" + output_dir
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    return cfg

## Train Detectron 2

In [None]:
def train_models(models, output_dirs):
    for i in range(len(models)):
        model = models[i]
        output_dir = output_dirs[i] 

        cfg = setup_config(model, output_dir)

        # Adaptive LRs
        lrs = [0.0001,0.00001]
        epochs = [500, 1500]

        # Model Training
        resume_training = False
        for lr, epoch in zip(lrs, epochs):
            clear_output()

            cfg.SOLVER.BASE_LR = lr  # Learning rate
            cfg.SOLVER.MAX_ITER = epoch  # Adjust the number of iterations/epochs

            # Start training
            trainer = DefaultTrainer(cfg)
            trainer.resume_or_load(resume=resume_training)
            trainer.train()

            if (resume_training == False):
                resume_training = True     # after first loop, training will be resumed from previous state


        cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
        evaluator = COCOEvaluator(test_data, cfg, False, output_dir=cfg.OUTPUT_DIR)
        test_loader = build_detection_test_loader(cfg, test_data)
        eval_results = inference_on_dataset(trainer.model, test_loader, evaluator)

        with open(output_dirs[i] + "_results.txt", "w") as f:
            f.write("Evaluation Metrics:\n")
            for metric, value in eval_results.items():
                f.write(f"{metric}: {value}\n")
                
                
# Detection Models
detectron_models = [
    "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml", 
    "COCO-Detection/retinanet_R_50_FPN_1x.yaml",
    "Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml"
]
detectron_output_dirs = ["output_faster_rcnn", "output_retinanet", "output_cascade_rcnn"]
                
#train_models(detectron_models, detectron_output_dirs)

# Train Yolov5

In [None]:
!pip install ultralytics
from ultralytics import YOLO
import os
os.environ['WANDB_DISABLED'] = 'true'

# Train
#model = YOLO('/kaggle/working/output_yolo/train/weights/best.pt')  # Load a pre-trained YOLOv5 model
#model.train(data='/kaggle/input/dataset4/train_data.yaml', epochs=80, batch=16, imgsz=640, plots=True,project='output_yolo', lr0=0.0001, cos_lr=True, lrf=0.01)  # Train the model

# Validation
#model = YOLO('/kaggle/working/output_yolo/train2/weights/best.pt')  # Load a pre-trained YOLOv5 model
#model.val(data='/kaggle/input/dataset4/train_data.yaml', plots=True, project='output_yolo_test')  # Validate the model

# Test
#model = YOLO('/kaggle/working/output_yolo/train/weights/best.pt')  # Load a pre-trained YOLOv5 model
#model.val(data='/kaggle/input/dataset4/test_data.yaml', plots=True, project='output_yolo_test')  # Test the model

# Performance Analysis

### Tool Functions

In [None]:
def draw_bounding_boxes(image, boxes, classes, gt, plot=False):
    # Draw ground truth bounding boxes
    if gt:
        color = (0, 0, 255)
    else:
        color = (0, 255, 0)

    item_no=1
    for box,clas in zip(boxes, classes):
        x_min, y_min, x_max, y_max = map(int, box)
        label = f'{item_no}__{clas}'
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 1)  # Blue for ground truth
        cv2.putText(image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

        item_no +=1
        
    if(plot==True):
        plt.figure(figsize=(12, 8))
        plt.imshow(image)
        plt.title("Predicted Bounding Boxes and Classes")
        plt.axis('off')  # Hide axis
        plt.show()

    return image


def xywhn_to_xyxy(cx, cy, w, h, image_width, image_height):
    """
    Convert YOLO format [cx, cy, w, h] with normalized values to [x_min, y_min, x_max, y_max] in pixel values.

    Parameters:
    - cx, cy: Center coordinates (normalized)
    - w, h: Width and height (normalized)
    - image_width: Width of the image in pixels
    - image_height: Height of the image in pixels

    Returns:
    - A list [x_min, y_min, x_max, y_max] in pixel values
    """
    x_min = (cx - w / 2) * image_width
    y_min = (cy - h / 2) * image_height
    x_max = (cx + w / 2) * image_width
    y_max = (cy + h / 2) * image_height
    return [x_min, y_min, x_max, y_max]

### Load Models

In [None]:
def load_yolo(image, weight, threshold):
    start_time = time.time()
    tracemalloc.start()
    #-----------------------
    model = YOLO(weight)
    results = model.predict(image, conf=threshold)
    pred_boxes = results[0].boxes.xyxy.cpu().numpy()  # Predicted bounding boxes
    pred_classes = results[0].boxes.data[:, 5].cpu().numpy()  # Predicted class IDs
    pred_scores = results[0].boxes.data[:, 4].cpu().numpy()  # Confidence scores
    #------------------------
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    end_time = time.time()
    execution_time = end_time - start_time
    execution_time = np.round(execution_time,2)
    return pred_boxes, pred_classes, pred_scores, execution_time



def load_detectron2(image, model, threshold, all_models=detectron_models, all_dirs=detectron_output_dirs):
    start_time = time.time()
    tracemalloc.start()
    #------------------------
    model_index = all_models.index(model)
    output_dir = all_dirs[model_index]
    cfg = setup_config(model, output_dir)
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    
    # Step 2: Create a Predictor
    predictor = DefaultPredictor(cfg)
    outputs = predictor(image)
    
    custom_threshold = threshold
    instances = outputs["instances"]

    # Extract the bounding boxes, class labels, and scores
    boxes = instances.pred_boxes if instances.has("pred_boxes") else None
    scores = instances.scores if instances.has("scores") else None
    classes = instances.pred_classes if instances.has("pred_classes") else None

    # Filter predictions based on the custom threshold
    filtered_indices = scores > custom_threshold
    filtered_boxes = boxes[filtered_indices]
    filtered_scores = scores[filtered_indices]
    filtered_classes = classes[filtered_indices]

    pred_boxes = filtered_boxes.tensor.to('cpu').tolist()
    pred_scores = filtered_scores.to('cpu').tolist()
    pred_classes = filtered_classes.to('cpu').tolist()
    #-------------------------------------------------
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    end_time = time.time()
    execution_time = end_time - start_time
    execution_time = np.round(execution_time,2)
    
    return pred_boxes, pred_classes, pred_scores, execution_time

### Calculate IOU

In [None]:
def calculate_iou(box1, box2):
    """
    Calculate Intersection over Union (IoU) between two bounding boxes.
    box1, box2 format: [x_min, y_min, x_max, y_max]
    """
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    # Calculate the area of the intersection
    intersection = max(0, x2 - x1) * max(0, y2 - y1)

    # Calculate the areas of each box
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

    # Calculate the union area
    union = box1_area + box2_area - intersection

    # Calculate IoU
    iou = intersection / union if union > 0 else 0
    return iou

In [None]:
def performance_metrics(test_folder, model_name, model, threshold):
    data = []
    execution_times = []
    target_found_list = []
    class_identified_list = []
    multiclass_error_list = []
    
    img_list = os.listdir(test_folder+'images/')
    img_list.sort()
    for img in img_list:
        # Load image
        image_path = test_folder + '/images/'+ img
        image = cv2.imread(image_path)

        
        # Extract predictions
        if model_name=='yolo':
            pred_boxes, pred_classes, pred_scores, execution_time = load_yolo(image, model, threshold)
        elif model_name in ["faster_rcnn", "retinanet", "cascade_rcnn"]:
            pred_boxes, pred_classes, pred_scores, execution_time = load_detectron2(image, model, threshold)
        else:
            print('Model Doesnt Exists')
           
        # Extract ground truth
        label_path =test_folder + 'labels/' + img[:-4] + '.txt'
        with open(label_path, 'r') as f:
            lines = f.readlines()
            
        try:
            target_gt = lines[0].split()
        except:
            continue
            
        target_gt = [float(x) for x in target_gt]  # convert string to float
        target_box = xywhn_to_xyxy(target_gt[1], target_gt[2], target_gt[3], target_gt[4], image_width=image.shape[1], image_height=image.shape[0])
        target_class = 1  # in the dataset our target is only car. Only target object is annotated in the tracking dataset

        
        
        # Compare each predicted box with ground truth boxes
        bbox_matched_list = []
        for i, pred_box in enumerate(pred_boxes):
            iou = calculate_iou(pred_box, target_box)
            #print(f'Predicted class: {pred_classes[i]}, IoU with ground truth: {iou:.2f}, Confidence: {pred_scores[i]:.2f}')
            if iou > 0.8:
                bbox_matched_list.append(pred_classes[i])
            
            
            
        # Calculate performance metrics
        target_found = 1 if len(bbox_matched_list) > 0 else 0.0
        class_identified = 1 if target_class in bbox_matched_list else 0.0

        if target_found and class_identified:
            multiclass_error = (len(bbox_matched_list) - 1) / (len(pred_boxes) - 1) if len(pred_boxes) > 1 else 0.0
        else:
            multiclass_error = 0.0
            
        # Log metrics per image
        log = f'\n{img[:-4]} \t Target found: {target_found} \t Class identified: {class_identified} \t Multiclass error: {np.round(multiclass_error, 2)}'
        clear_output()
        print('\n Running Model: ', model_name, '\t', 'Threshold: ', threshold)
        print(log)
        
        data.append(log)
        target_found_list.append(target_found)
        class_identified_list.append(class_identified)
        multiclass_error_list.append(np.round(multiclass_error, 2))
        execution_times.append(execution_time)

    target_found_acc = np.mean(target_found_list)
    class_identified_acc = np.mean(class_identified_list)
    multiclass_error_avg = np.round(np.mean(multiclass_error_list), 2)
    execution_times_avg =  np.round(np.mean(execution_times), 2)
    
    log = f'\n\n\nTarget found accuracy: {target_found_acc} \t Class identified accuracy: {class_identified_acc} \t Multiclass error: {multiclass_error_avg} \t Average Execution Time: {execution_times_avg}s'
    print(log)
    data.append(log)

    # Save results to a text file
    txt_folder = '/kaggle/working/txt_results/'
    with open(txt_folder + f'{model_name}_threshold_{threshold}_results.txt', 'w') as f:
      for log in data:
        f.write(log)
  

In [None]:
# Load ground truths
test_folder = '/kaggle/input/tracking-ground-truth/test/'
thresholds = [0.5, 0.3, 0.1, 1e-5]
model_names = ["faster_rcnn", "retinanet", "cascade_rcnn", 'yolo']

model_wgts = detectron_models.copy()
model_wgts.append('/kaggle/working/output_yolo/train2/weights/best.pt')

for model_name, model in zip(model_names, model_wgts):
    for threshold in thresholds:
        performance_metrics(test_folder, model_name, model, threshold)

# Metrics on Test Datasets

In [35]:
# Make Zip to download all results
import shutil
import os
import pandas as pd

txt_folder = '/kaggle/working/txt_results/'
columns=['Model_name', 'Avg_target_found_acc', 'Avg_class_identified_acc', 'Avg_multi_class_error', 'Avg_execution_time']

data_row = []
thresholds = [0.5, 0.3, 0.1, 1e-5]
model_names = ["faster_rcnn", "retinanet", "cascade_rcnn", 'yolo']

for model in model_names:
    for threshold in thresholds:
        file = f'{model}_threshold_{threshold}_results.txt'

        with open(txt_folder + file, 'r') as f:
            lines = f.readlines()

        values = []
        values.append(f'{model}_{threshold}')
        split_line = re.split(r'[\t\s]+|s', lines[-1])
        split_line = [x for x in split_line if x]

        for data in split_line:
            try:
                values.append(float(data))
            except:
                pass
        
        data_row.append(values)

df = pd.DataFrame(data_row, columns=columns)   
df.to_csv(txt_folder + 'All_detections_output.csv', index=False)
df

Unnamed: 0,Model_name,Avg_target_found_acc,Avg_class_identified_acc,Avg_multi_class_error,Avg_execution_time
0,faster_rcnn_0.5,0.84861,0.787848,0.12,1.25
1,faster_rcnn_0.3,0.886715,0.883625,0.2,1.24
2,faster_rcnn_0.1,0.911432,0.899073,0.15,1.23
3,faster_rcnn_1e-05,0.915551,0.899073,0.12,1.22
4,retinanet_0.5,0.819773,0.819773,0.07,1.09
5,retinanet_0.3,0.859938,0.857878,0.34,1.09
6,retinanet_0.1,0.881565,0.875386,0.13,1.09
7,retinanet_1e-05,0.881565,0.875386,0.03,1.1
8,cascade_rcnn_0.5,0.76828,0.76828,0.0,1.83
9,cascade_rcnn_0.3,0.908342,0.907312,0.25,1.87


# Convert CSV for Statical Analysis

In [38]:
df_combined_statics = df.copy()

selected_column = ['Avg_target_found_acc', 'Avg_class_identified_acc']

for column in selected_column:
    df_combined_statics[column] = df_combined_statics[column] * -1

#df_combined_statics.to_csv
df_combined_statics_transposed = df_combined_statics.copy().T
df_combined_statics_transposed = df_combined_statics_transposed.reset_index()
df_combined_statics_transposed.to_csv(txt_folder + '/All_detections_output_statics.csv', index=False, sep=',')
df_combined_statics_transposed

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,Model_name,faster_rcnn_0.5,faster_rcnn_0.3,faster_rcnn_0.1,faster_rcnn_1e-05,retinanet_0.5,retinanet_0.3,retinanet_0.1,retinanet_1e-05,cascade_rcnn_0.5,cascade_rcnn_0.3,cascade_rcnn_0.1,cascade_rcnn_1e-05,yolo_0.5,yolo_0.3,yolo_0.1,yolo_1e-05
1,Avg_target_found_acc,-0.84861,-0.886715,-0.911432,-0.915551,-0.819773,-0.859938,-0.881565,-0.881565,-0.76828,-0.908342,-0.916581,-0.918641,-0.430484,-0.490216,-0.546859,-0.625129
2,Avg_class_identified_acc,-0.787848,-0.883625,-0.899073,-0.899073,-0.819773,-0.857878,-0.875386,-0.875386,-0.76828,-0.907312,-0.915551,-0.915551,-0.430484,-0.489186,-0.543769,-0.604531
3,Avg_multi_class_error,0.12,0.2,0.15,0.12,0.07,0.34,0.13,0.03,0.0,0.25,0.19,0.18,0.0,0.0,0.0,0.0
4,Avg_execution_time,1.25,1.24,1.23,1.22,1.09,1.09,1.09,1.1,1.83,1.87,1.87,1.87,0.49,0.49,0.49,0.48


In [None]:
# Make Zip to download all results

OUTPUT_NAME = f'/kaggle/working/Tracker_Output'
DIRECTORY_TO_ZIP = f'/kaggle/working/Output'

print(DIRECTORY_TO_ZIP)
shutil.make_archive(OUTPUT_NAME, 'zip', DIRECTORY_TO_ZIP)