In [3]:
from transformers import RTDetrImageProcessor, RTDetrForObjectDetection
from PIL import Image
import torch
import os
import json
from tqdm import tqdm

# Пути
checkpoint_path = r"C:\Users\odara\Downloads\data\rtdetr_fine_tuning\checkpoint-210000"
val_images_dir = r"C:\Users\odara\Downloads\data\val\images"
output_predictions_file = r"C:\Users\odara\Downloads\data\rtdetr_labels\predictions_val.json"

# Загрузка модели и процессора
model = RTDetrForObjectDetection.from_pretrained(checkpoint_path)
processor = RTDetrImageProcessor.from_pretrained(
    "PekingU/rtdetr_r101vd_coco_o365",
    size={"height": 512, "width": 512}
)

model.eval()
model.to("cuda")

# Предсказания
results = []
image_filenames = sorted([
    f for f in os.listdir(val_images_dir) if f.lower().endswith((".jpg", ".png"))
])

# Сопоставление image_id с именами файлов
image_id_map = {i: fname for i, fname in enumerate(image_filenames)}
filename_to_image_id = {v: k for k, v in image_id_map.items()}

for img_file in tqdm(image_filenames):
    image_path = os.path.join(val_images_dir, img_file)
    image = Image.open(image_path).convert("RGB")
    
    inputs = processor(images=image, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = model(**inputs)

    # Постобработка
    target_sizes = torch.tensor([image.size[::-1]])  # (H, W)
    results_post = processor.post_process_object_detection(
        outputs, target_sizes=target_sizes, threshold=0.001
    )[0]

    image_id = filename_to_image_id[img_file]

    for score, label, box in zip(results_post["scores"], results_post["labels"], results_post["boxes"]):
        x_min, y_min, x_max, y_max = box
        results.append({
            "image_id": image_id,
            "category_id": int(label),
            "bbox": [
                round(float(x_min), 2),
                round(float(y_min), 2),
                round(float(x_max - x_min), 2),
                round(float(y_max - y_min), 2)
            ],
            "score": round(float(score), 5)
        })

# Сохраняем в COCO формате
with open(output_predictions_file, "w") as f:
    json.dump(results, f)

print(f"Предсказания сохранены в: {output_predictions_file}")

100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [40:36<00:00,  4.10it/s]


Предсказания сохранены в: C:\Users\odara\Downloads\data\rtdetr_labels\predictions_val.json


In [4]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import json

# Пути к аннотациям и предсказаниям
gt_path = r"C:\Users\odara\Downloads\data\rtdetr_labels\instances_val.json"
pred_path = r"C:\Users\odara\Downloads\data\rtdetr_labels\predictions_val.json"

# Загрузка ground truth
coco_gt = COCO(gt_path)

# Загрузка предсказаний
with open(pred_path, "r") as f:
    predictions = json.load(f)

# Загрузка предсказаний в COCO формат
coco_dt = coco_gt.loadRes(predictions)

# Инициализация COCOeval
coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

loading annotations into memory...
Done (t=0.34s)
creating index...
index created!
Loading and preparing results...
DONE (t=4.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=94.44s).
Accumulating evaluation results...
DONE (t=18.76s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.307
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.537
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.291
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.096
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.365
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.634
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.217
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.410
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDe

In [5]:
# К примеру, precision по каждому классу
precisions = coco_eval.eval['precision']  # [T, R, K, A, M]
# T: IoU thresholds, R: recall thresholds, K: num classes

num_classes = len(coco_gt.getCatIds())
for i in range(num_classes):
    ap = precisions[:, :, i, 0, -1]  # all IoU thresholds, all recall, class i, area all, maxDets=100
    ap = ap[ap > -1]
    mean_ap = ap.mean() if len(ap) else float('nan')
    print(f"Class {i}: AP = {mean_ap:.4f}")

Class 0: AP = 0.4386
Class 1: AP = 0.2796
Class 2: AP = 0.1747
Class 3: AP = 0.2609
Class 4: AP = 0.4174
Class 5: AP = 0.4409
Class 6: AP = 0.2201
Class 7: AP = 0.2203


In [1]:
from transformers import RTDetrImageProcessor, RTDetrForObjectDetection
from PIL import Image
import torch
import os
import json
from tqdm import tqdm

# Пути
# checkpoint_path = r"C:\Users\odara\Downloads\data\rtdetr_fine_tuning\checkpoint-210000"
checkpoint_path = r"C:\Users\odara\Downloads\data\rtdetr_fine_tuning\final_model"
val_images_dir = r"C:\Users\odara\Downloads\data\val\images"
output_predictions_file = r"C:\Users\odara\Downloads\data\rtdetr_labels\predictions_val.json"

# Загрузка модели и процессора
model = RTDetrForObjectDetection.from_pretrained(checkpoint_path)
processor = RTDetrImageProcessor.from_pretrained(
    "PekingU/rtdetr_r101vd_coco_o365",
    size={"height": 512, "width": 512}
)

model.eval()
model.to("cuda")

# Предсказания
results = []
image_filenames = sorted([
    f for f in os.listdir(val_images_dir) if f.lower().endswith((".jpg", ".png"))
])

# Сопоставление image_id с именами файлов
image_id_map = {i: fname for i, fname in enumerate(image_filenames)}
filename_to_image_id = {v: k for k, v in image_id_map.items()}

for img_file in tqdm(image_filenames):
    image_path = os.path.join(val_images_dir, img_file)
    image = Image.open(image_path).convert("RGB")
    
    inputs = processor(images=image, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = model(**inputs)

    # Постобработка
    target_sizes = torch.tensor([image.size[::-1]])  # (H, W)
    results_post = processor.post_process_object_detection(
        outputs, target_sizes=target_sizes, threshold=0.001
    )[0]

    image_id = filename_to_image_id[img_file]

    for score, label, box in zip(results_post["scores"], results_post["labels"], results_post["boxes"]):
        x_min, y_min, x_max, y_max = box
        results.append({
            "image_id": image_id,
            "category_id": int(label),
            "bbox": [
                round(float(x_min), 2),
                round(float(y_min), 2),
                round(float(x_max - x_min), 2),
                round(float(y_max - y_min), 2)
            ],
            "score": round(float(score), 5)
        })

# Сохраняем в COCO формате
with open(output_predictions_file, "w") as f:
    json.dump(results, f)

print(f"Предсказания сохранены в: {output_predictions_file}")

100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [38:30<00:00,  4.33it/s]


Предсказания сохранены в: C:\Users\odara\Downloads\data\rtdetr_labels\predictions_val.json


In [2]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import json

# Пути к аннотациям и предсказаниям
gt_path = r"C:\Users\odara\Downloads\data\rtdetr_labels\instances_val.json"
pred_path = r"C:\Users\odara\Downloads\data\rtdetr_labels\predictions_val.json"

# Загрузка ground truth
coco_gt = COCO(gt_path)

# Загрузка предсказаний
with open(pred_path, "r") as f:
    predictions = json.load(f)

# Загрузка предсказаний в COCO формат
coco_dt = coco_gt.loadRes(predictions)

# Инициализация COCOeval
coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

loading annotations into memory...
Done (t=0.35s)
creating index...
index created!
Loading and preparing results...
DONE (t=4.10s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=93.83s).
Accumulating evaluation results...
DONE (t=19.27s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.307
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.537
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.291
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.096
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.365
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.634
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.217
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.410
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDe

In [6]:
# К примеру, precision по каждому классу
precisions = coco_eval.eval['precision']  # [T, R, K, A, M]

# Названия классов в нужном порядке
class_names = [
    "car",
    "traffic sign",
    "traffic light",
    "person",
    "truck",
    "bus",
    "two_wheeler",
    "rider"
]

for i, class_name in enumerate(class_names):
    ap = precisions[:, :, i, 0, -1]  # all IoU thresholds, all recall, class i, area=all, maxDets=100
    ap = ap[ap > -1]
    mean_ap = ap.mean() if len(ap) else float('nan')
    print(f"{class_name:15s}: AP@[IoU=0.50:0.95] = {mean_ap:.4f}")

car            : AP@[IoU=0.50:0.95] = 0.4386
traffic sign   : AP@[IoU=0.50:0.95] = 0.2796
traffic light  : AP@[IoU=0.50:0.95] = 0.1747
person         : AP@[IoU=0.50:0.95] = 0.2609
truck          : AP@[IoU=0.50:0.95] = 0.4174
bus            : AP@[IoU=0.50:0.95] = 0.4409
two_wheeler    : AP@[IoU=0.50:0.95] = 0.2201
rider          : AP@[IoU=0.50:0.95] = 0.2203


без учёта класса rider метрика немного выше: 0.3189, вместо 0.3191 с ним

In [5]:
# Названия классов в нужном порядке
class_names = [
    "car",
    "traffic sign",
    "traffic light",
    "person",
    "truck",
    "bus",
    "two_wheeler",
    "rider"
]

precisions = coco_eval.eval['precision']  # [T, R, K, A, M]
iou_thresholds = coco_eval.params.iouThrs  # список из 10 IoU-порогов

# Индекс порога IoU = 0.50
iou_50_index = list(iou_thresholds).index(0.5)

num_classes = len(class_names)

for i in range(num_classes):
    ap = precisions[iou_50_index, :, i, 0, 2]  # IoU=0.50, all recall, class i, area=all, maxDets=100
    ap = ap[ap > -1]
    mean_ap = ap.mean() if len(ap) else float('nan')
    print(f"{class_names[i]:<15}: AP@0.50 = {mean_ap:.4f}")

car            : AP@0.50 = 0.7343
traffic sign   : AP@0.50 = 0.5456
traffic light  : AP@0.50 = 0.4909
person         : AP@0.50 = 0.5133
truck          : AP@0.50 = 0.5750
bus            : AP@0.50 = 0.5712
two_wheeler    : AP@0.50 = 0.4467
rider          : AP@0.50 = 0.4220


без класса rider метрика немного выше — 0.5539, вместо 0.5374

In [7]:
import os

file_path = r"C:\Users\odara\Downloads\data\rtdetr_fine_tuning\final_model\model.safetensors"
size_mb = os.path.getsize(file_path) / (1024 * 1024)
print(f"Размер модели: {size_mb:.2f} МБ")

Размер модели: 293.09 МБ
