In [None]:
!pip install -q git+https://github.com/huggingface/transformers.git
!pip install -q git+https://github.com/roboflow/supervision.git
!pip install -q accelerate
!pip install -q roboflow
!pip install -q torchmetrics
!pip install -q "albumentations>=1.4.5"

In [None]:
import torch
import requests

import numpy as np
import supervision as sv
import albumentations as A

from PIL import Image
from pprint import pprint
from roboflow import Roboflow
from dataclasses import dataclass, replace
from google.colab import userdata
from torch.utils.data import Dataset
from transformers import (
    AutoImageProcessor,
    AutoModelForObjectDetection,
    TrainingArguments,
    Trainer
)
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from transformers import RTDetrImageProcessor, RTDetrForObjectDetection, RTDetrV2ForObjectDetection


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:

from roboflow import Roboflow
rf = Roboflow(api_key="APIKEY")
project = rf.workspace("kuivashev").project("hopefully-final-2zc5r")
version = project.version(1)
dataset = version.download("coco")



In [None]:
ds_train = sv.DetectionDataset.from_coco(
    images_directory_path=f"{dataset.location}/train",
    annotations_path=f"{dataset.location}/train/_annotations.coco.json",
)
ds_valid = sv.DetectionDataset.from_coco(
    images_directory_path=f"{dataset.location}/valid",
    annotations_path=f"{dataset.location}/valid/_annotations.coco.json",
)
ds_test = sv.DetectionDataset.from_coco(
    images_directory_path=f"{dataset.location}/test",
    annotations_path=f"{dataset.location}/test/_annotations.coco.json",
)

print(f"Number of training images: {len(ds_train)}")
print(f"Number of validation images: {len(ds_valid)}")
print(f"Number of test images: {len(ds_test)}")

In [None]:

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = RTDetrV2ForObjectDetection.from_pretrained("/content/drive/MyDrive/models/rt-detr/rt-detr-3005/m", local_files_only=True).to(DEVICE)
processor = RTDetrImageProcessor.from_pretrained("/content/drive/MyDrive/models/rt-detr/rt-detr-3005/p", local_files_only=True)

In [None]:
id2label = {id: label for id, label in enumerate(ds_train.classes)}
label2id = {label: id for id, label in enumerate(ds_train.classes)}


In [None]:
!pip install ultralytics

In [None]:
from ultralytics import YOLO
yolo_model = YOLO("/content/drive/MyDrive/models/yolo/yolo_latest/best.pt")


In [None]:

def annotate(image, annotations, classes):
    labels = [
        classes[class_id]
        for class_id
        in annotations.class_id
    ]

    bounding_box_annotator = sv.BoxAnnotator()
    label_annotator = sv.LabelAnnotator(text_scale=1, text_thickness=2)

    annotated_image = image.copy()
    annotated_image = bounding_box_annotator.annotate(annotated_image, annotations)
    annotated_image = label_annotator.annotate(annotated_image, annotations, labels=labels)
    return annotated_image

In [None]:
from re import M
import numpy as np
from sklearn.isotonic import IsotonicRegression

MODEL_WEIGHTS = {
    "YOLOv8x": {
        "plastic": 0,#0.958  * 0.916 ,
        "glass": 0,# 0.932 *  0.961/10,
        "metal":  0,#0.977 * 0.973  ,
        "paper": 0,# 0.862 *  0.806,
        "organic": 0,# 0.727 *  0.644   ,
                        "garb-garbage":0,

    },
    "RT-DETR-101": {
        "plastic": 0.8967,
        "glass": 0.8847,
        "metal": 0.8705,
        "paper": 0.7299 ,
        "organic":  0.6151,
                        "garb-garbage":0,

    }
}
"""
MODEL_ERRORS = {
    "YOLOv8x":{
        "plastic": 1- 0.935 * 0.849 ,
        "glass": 1- 0.967 *  0.942,
        "metal":1-  0.955 *  0.93  ,
        "paper":1- 0.873 *  0.768,
        "organic": 1- 0.703 *  0.639   ,
    },
        "RT-DETR-101": {
        "plastic": 1- 0.8967,
        "glass": 1- 0.8847,
        "metal": 1- 0.8705,
        "paper": 1- 0.7299 ,
        "organic": 1-  0.6151,
    }
}
MODEL_WEIGHTS = {
    "RT-DETR-101": {
        "plastic":  MODEL_ERRORS["YOLOv8x"]["plastic"] /MODEL_ERRORS["RT-DETR-101"]["plastic"]  ,
        "glass": MODEL_ERRORS["YOLOv8x"]["glass"] /MODEL_ERRORS["RT-DETR-101"]["glass"] ,
        "metal": MODEL_ERRORS["YOLOv8x"]["metal"] /MODEL_ERRORS["RT-DETR-101"]["metal"]   ,
        "paper": MODEL_ERRORS["YOLOv8x"]["paper"] /MODEL_ERRORS["RT-DETR-101"]["paper"] ,
        "organic":MODEL_ERRORS["YOLOv8x"]["organic"] /MODEL_ERRORS["RT-DETR-101"]["organic"]    ,
                "garb-garbage":0,

    },
    "YOLOv8x": {
        "plastic": MODEL_ERRORS["RT-DETR-101"]["plastic"] /MODEL_ERRORS["YOLOv8x"]["plastic"]  ,
        "glass":MODEL_ERRORS["RT-DETR-101"]["glass"] /MODEL_ERRORS["YOLOv8x"]["glass"] ,
        "metal": MODEL_ERRORS["RT-DETR-101"]["metal"] /MODEL_ERRORS["YOLOv8x"]["metal"]   ,
        "paper":MODEL_ERRORS["RT-DETR-101"]["paper"] /MODEL_ERRORS["YOLOv8x"]["paper"] ,
        "garb-garbage":0,# 1,
        "organic": MODEL_ERRORS["RT-DETR-101"]["organic"] /MODEL_ERRORS["YOLOv8x"]["organic"]    ,
    }
}
"""
CONFIDENCE_DELTA_THRESHOLD = 0.5


CALIBRATORS = {
    "YOLOv8x": {},
    "RT-DETR-101": {}
}

def calibrate_confidence(confidence, model_name, class_name):
    if model_name not in CALIBRATORS or class_name not in CALIBRATORS[model_name]:
        weight = MODEL_WEIGHTS[model_name].get(class_name.lower())
        return confidence * np.sqrt(weight)

    calibrator = CALIBRATORS[model_name][class_name]
    return calibrator.predict([confidence])[0]

def calculate_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection

    return intersection / union if union > 0 else 0

def match_detections(yolo_detections, detr_detections, iou_threshold=0.3):
    matched = []
    used_detr = set()

    for yolo_idx, yolo_det in enumerate(yolo_detections):
        best_match = None
        best_iou = 0

        for detr_idx, detr_det in enumerate(detr_detections):
            if detr_idx in used_detr:
                continue

            iou = calculate_iou(yolo_det['box'], detr_det['box'])
            if iou > iou_threshold and iou > best_iou:
                best_iou = iou
                best_match = detr_idx

        if best_match is not None:
            matched.append((yolo_idx, best_match))
            used_detr.add(best_match)

    return matched

def ensemble_predictions(yolo_det, detr_det, class_names):
    yolo_label = yolo_det['label']
    yolo_conf = yolo_det['confidence']
    detr_label = detr_det['label']
    detr_conf = detr_det['confidence']

    yolo_conf_calibrated = calibrate_confidence(yolo_conf, "YOLOv8x", yolo_label)
    detr_conf_calibrated = calibrate_confidence(detr_conf, "RT-DETR-101", detr_label)

    if abs(yolo_conf_calibrated - detr_conf_calibrated) > CONFIDENCE_DELTA_THRESHOLD:
        print("CONFLICT")
        yolo_weight_for_yolo_class = MODEL_WEIGHTS["YOLOv8x"].get(yolo_label.lower())
        detr_weight_for_detr_class = MODEL_WEIGHTS["RT-DETR-101"].get(detr_label.lower())

        if yolo_weight_for_yolo_class > detr_weight_for_detr_class:
            return yolo_label, yolo_conf_calibrated, yolo_det['box']
        else:
            return detr_label, detr_conf_calibrated, detr_det['box']

    combined_scores = {}

    for class_name in class_names:
        yolo_score = yolo_conf_calibrated if yolo_label == class_name else 0
        detr_score = detr_conf_calibrated if detr_label == class_name else 0
        class_yolo_weight = MODEL_WEIGHTS["YOLOv8x"].get(class_name.lower())
        class_detr_weight = MODEL_WEIGHTS["RT-DETR-101"].get(class_name.lower())
        if class_yolo_weight + class_detr_weight > 0:
            combined_scores[class_name] = (class_yolo_weight * yolo_score + class_detr_weight * detr_score) / (class_yolo_weight + class_detr_weight)
        else:
            combined_scores[class_name] = 0

    best_class = max(combined_scores.items(), key=lambda x: x[1])

    best_class_name = best_class[0]

    yolo_w = MODEL_WEIGHTS["YOLOv8x"].get(best_class_name.lower())
    detr_w = MODEL_WEIGHTS["RT-DETR-101"].get(best_class_name.lower())
    total_w = yolo_w + detr_w

    if total_w > 0:
        avg_box = [
            yolo_det['box'][0],
           yolo_det['box'][1],
            yolo_det['box'][2],
            yolo_det['box'][3]
        ]
    else:
        avg_box = [
            yolo_det['box'][0],
           yolo_det['box'][1],
            yolo_det['box'][2],
            yolo_det['box'][3]
        ]

    return best_class[0], best_class[1], avg_box



In [None]:
import os
VERBOSE = str(os.getenv("YOLO_VERBOSE", False)).lower() == "False"


In [None]:
yolo_model.names

In [None]:
import torch
import supervision as sv
from PIL import Image

targets = []
ensemble_preds = []
class_names = ['garb-garbage', 'glass', 'metal', 'organic', 'paper', 'plastic']
for i in range(45,len(ds_test)):
    print("________________________________________________________________")
    path, source_image, annotations = ds_test[i]
    image = Image.open(path)
    sv.plot_image(source_image, size=(6, 6))
    inputs = processor(image, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = model(**inputs)
    w, h = image.size
    results = processor.post_process_object_detection(outputs, target_sizes=[(h, w)], threshold=0.3)
    detr_detections = sv.Detections.from_transformers(results[0])

    yolo_results = yolo_model(image)
    yolo_detections = sv.Detections.from_ultralytics(yolo_results[0])


    detr_dets = [{'box': box, 'confidence': conf,
                  'label': class_names[cls]}
                 for box, conf, cls in zip(detr_detections.xyxy, detr_detections.confidence, detr_detections.class_id)]
    yolo_dets = [{'box': box, 'confidence': conf, 'label': class_names[cls]}
                 for box, conf, cls in zip(yolo_detections.xyxy, yolo_detections.confidence, yolo_detections.class_id)]

    matched = match_detections(yolo_dets, detr_dets)
    ensemble_boxes, ensemble_confs, ensemble_classes = [], [], []

    for yolo_idx, detr_idx in matched:
        label, conf, box = ensemble_predictions(yolo_dets[yolo_idx], detr_dets[detr_idx], class_names)
        ensemble_boxes.append(box)
        ensemble_confs.append(conf)
        ensemble_classes.append(class_names.index(label))

    matched_yolo = {m[0] for m in matched}
    matched_detr = {m[1] for m in matched}
    print(yolo_dets)
    print(detr_dets)
    for idx, det in enumerate(yolo_dets):
        if idx not in matched_yolo:
            ensemble_boxes.append(det['box'])
            ensemble_confs.append(calibrate_confidence(det['confidence'], "YOLOv8x", det['label']))
            ensemble_classes.append(class_names.index(det['label']))

    for idx, det in enumerate(detr_dets):
        if idx not in matched_detr:
            ensemble_boxes.append(det['box'])
            ensemble_confs.append(calibrate_confidence(det['confidence'], "RT-DETR-101", det['label']))
            ensemble_classes.append(class_names.index(det['label']))
    print(ensemble_boxes)
    print(ensemble_classes)
    if ensemble_boxes:
        ensemble_detection = sv.Detections(
            xyxy=np.array(ensemble_boxes),
            confidence=np.array(ensemble_confs),
            class_id=np.array(ensemble_classes)
        )
    else:
        ensemble_detection = sv.Detections.empty()

    targets.append(annotations)
    ensemble_preds.append(ensemble_detection)
    annotated_images = [
        annotate(source_image, annotations, ds_train.classes),
        annotate(source_image, ensemble_detection, ds_train.classes)
    ]

    grid = sv.create_tiles(
              annotated_images,
              titles=['ground truth', 'prediction'],
              titles_scale=0.5,
              single_tile_size=(400, 400),
              tile_padding_color=sv.Color.WHITE,
              tile_margin_color=sv.Color.WHITE
          )

    sv.plot_image(grid, size=(6, 6))



In [None]:
mean_average_precision = sv.MeanAveragePrecision.from_detections(
    predictions=ensemble_preds,
    targets=targets,
)

print(f"map50_95: {mean_average_precision.map50_95:.2f}")
print(f"map50: {mean_average_precision.map50:.2f}")
print(f"map75: {mean_average_precision.map75:.2f}")

In [None]:
IMAGE_COUNT = 30

for i in range(IMAGE_COUNT):
    path, sourece_image, annotations = ds_test[i+30]

    image = Image.open(path)
    inputs = processor(image, return_tensors="pt").to(DEVICE)

    with torch.no_grad():
        outputs = model(**inputs)

    w, h = image.size
    results = processor.post_process_object_detection(
        outputs, target_sizes=[(h, w)], threshold=0.3)

    detections = sv.Detections.from_transformers(results[0]).with_nms(threshold=0.1)

    annotated_images = [
        annotate(sourece_image, annotations, ds_train.classes),
        annotate(sourece_image, detections, ds_train.classes)
    ]
    grid = sv.create_tiles(
        annotated_images,
        titles=['ground truth', 'prediction'],
        titles_scale=0.5,
        single_tile_size=(400, 400),
        tile_padding_color=sv.Color.WHITE,
        tile_margin_color=sv.Color.WHITE
    )
    sv.plot_image(grid, size=(6, 6))