# Load data

In [None]:
import wandb
wandb.login()

wandb.init(
    project="AlfaFood Faster-RCNN",
    config={
    "learning_rate": 1e-4,
    "architecture": "Faster-RCNN Backbone ResNet50",
    "dataset": "AlfaFood",
    "epochs": 10,
    }
)

In [None]:
from pathlib import Path
import os

data_path = Path("/kaggle/input/alfafood")

def walk_through_dir(dir_path):
    """Walks through dir_path returning file counts of its contents."""
    for dirpath, dirnames, filenames in os.walk(dir_path):
        print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

walk_through_dir(data_path)

In [None]:
import random
from PIL import Image

image_path_list = list(data_path.glob("*/*.jpg"))

labels_path_list = list(data_path.glob("*/*.txt"))

print(image_path_list[:1])

random_image_path = random.choice(image_path_list)
print(random_image_path)

img = Image.open(random_image_path)

ORIGINAL_IMAGE_SHAPE = (img.size)

# Print metadata
print(f"Random image path: {random_image_path}")
print(f"Image height: {img.height}")
print(f"Image width: {img.width}")
img

In [None]:
image_path_list.sort()

labels_path_list.sort()

In [None]:
file_path = "/kaggle/input/alfafood/objects/objects_0.txt"

with open(file_path, 'r') as file:
    file_content = file.read()

print("Содержимое файла:")
print(file_content)

lines = file_content.splitlines()
data = []

for line in lines:
    parts = line.split(' ')
    if len(parts) == 6:
        label = parts[0]
        x_center = float(parts[1])
        y_center = float(parts[2])
        width = float(parts[3])
        height = float(parts[4])
        data.append((label, x_center, y_center, width, height))

print("Обработанные данные:")
for item in data:
    print(item)


In [None]:
from typing import Dict, List, Tuple

def create_objects_from_txt(path: Path) -> Dict[str, List]:
    with open(path, 'r') as file:
        file_content = file.read()

    lines = file_content.splitlines()
    data = dict()
    data["bboxes"], data["labels"] = list(), list()

    for line in lines:
        parts = line.split(' ')
        if len(parts) == 6:
            label = parts[0]
            x_center = float(parts[1])
            y_center = float(parts[2])
            width = float(parts[3])
            height = float(parts[4])
            data["bboxes"].append([x_center, y_center, width, height])
            data["labels"].append(int(label))
        else:
            print("Invalid data format")
            raise ValueError
    return data

create_objects_from_txt(file_path)

In [None]:
objects = [create_objects_from_txt(file_path) for file_path in labels_path_list]

objects[0]

# Preprocessing


In [None]:
import os
import cv2
import random
import pathlib
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from typing import Tuple, Dict, List

import torch
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from sklearn.model_selection import train_test_split

import albumentations
from albumentations.pytorch.transforms import ToTensorV2

from PIL import Image, ImageFile, ImageFont, ImageDraw, ImageEnhance
ImageFile.LOAD_TRUNCATED_IMAGES = True

import copy
import random
from time import time

import warnings
warnings.filterwarnings('ignore')

FUSED_SHAPE = (640, 480)
ORIGINAL_SHAPE = (4000, 3000)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Prepare Dataset

In [None]:
class AlfaFoodDataset(Dataset):
    def __init__(self, images: List, objects: List[Dict[str, List]], transform: torchvision.transforms=None) -> None:
        self.images = images
        self.annotations = copy.deepcopy(objects)
        self.transform = transform
        self.num_classes = len(set(i for ob in objects for i in ob['labels']))
        self.list_transforms = np.zeros(shape=(len(self.images),))

#         for i in range(len(self.annotations)):
#             self.bboxes = self.annotations[i]['bbox']
#             for bbox in self.bboxes:
#                 bbox[2] += bbox[0]
#                 bbox[3] += bbox[1]


    def __getitem__(self, index: int) -> Tuple[torch.Tensor, Tuple[Tuple[int]], Tuple[int]]:
        "Returns one sample of data: image, labels, bboxes"

        image = np.array(Image.open(self.images[index]).convert('RGB'))
        bboxes = self.annotations[index]['bboxes']
        labels = self.annotations[index]['labels']

        if self.transform:
            # print(image.shape)
            transformed = self.transform(image = image, bboxes = bboxes, labels = labels)
            image = np.array(transformed['image']).transpose(1, 2, 0)
            bboxes = transformed['bboxes']
            labels = transformed['labels']

            self.list_transforms[index] = 1
        image = image.transpose(2, 0, 1)
        target = dict()
        target['boxes'] = torch.as_tensor(bboxes, dtype=torch.float)
        target['labels'] = torch.as_tensor(labels, dtype=torch.int64)
        
#         if target['boxes'][1] == 0:
#             target['boxes'][1] = 1e-10
        if target['boxes'].shape == torch.Size([0]):
            target['boxes'] = torch.Tensor([0, 0, 1e-10, 1e-10]).unsqueeze(dim=0)
        if target['labels'].shape == torch.Size([0]):
            target['labels']= torch.zeros(size=(1, ), dtype=torch.int64)
        return image, target


    def __len__(self) -> int:
        "Returns the total number of samples."
        return len(self.images)

# Visualization bounding boxes with labels

In [None]:
data = AlfaFoodDataset(image_path_list, objects)

img, target = data[8]
print(img.shape)

In [None]:
color = list((random.randint(40, 240), random.randint(40, 255), random.randint(60, 255)) for i in range(129))

def objects_threshold_scores(bboxes: torch.Tensor, 
                         labels: torch.Tensor=None, 
                         scores: torch.Tensor=None,
                         threshold_score: float=0.1):
    bboxes_copy = copy.deepcopy(bboxes)
    labels_copy = copy.deepcopy(labels)
    scores_copy = copy.deepcopy(scores)

    bboxes = torch.Tensor([])
    labels, scores = list(), list()
    for i, score in enumerate(scores_copy):
        if score >= threshold_score:
            bboxes = torch.cat((bboxes, bboxes_copy[i].unsqueeze(dim=0)), dim=0)
            labels.append(labels_copy[i])
            scores.append(score)
    
#     bboxes = torch.Tensor(bboxes).unsqueeze(dim=0)
    labels = torch.Tensor(labels)
    scores = torch.Tensor(scores)

    del bboxes_copy, labels_copy, scores_copy

    return bboxes, labels, scores

def show_image_with_objects(image: np.array, 
                            bboxes: torch.Tensor, 
                            labels: torch.Tensor=None, 
                            scores: torch.Tensor=None,
                            threshold_score: float=0.5):

    image = Image.fromarray(image.transpose(1, 2, 0))

#     random.shuffle(color)

    if scores != None:
        bboxes, labels, scores = objects_threshold_scores(bboxes, labels, scores, threshold_score)

    for i in range(len(bboxes)):
        draw = ImageDraw.Draw(image)
        draw.rectangle(bboxes[i].numpy(), outline = color[labels[i].int()], width=2)

        if scores != None:
            bbox = draw.textbbox((bboxes[i][0], bboxes[i][1]), f"ID{int(labels[i])} {scores[i] * 100:.2f}%")
            draw.rectangle((bbox[0]-2, bbox[1]-2, bbox[2]+2, bbox[3]+2), fill=(0, 0, 0))
            draw.text((bboxes[i][0], bboxes[i][1]), f"ID{int(labels[i])} {scores[i] * 100:.2f}%", color[labels[i].int()])
        else:
            bbox = draw.textbbox((bboxes[i][0], bboxes[i][1]), f"ID{int(labels[i])}")
            draw.rectangle((bbox[0]-2, bbox[1]-2, bbox[2]+2, bbox[3]+2), fill=(0, 0, 0))
            draw.text((bboxes[i][0], bboxes[i][1]), f"ID{int(labels[i])}", color[labels[i]])
    return image

img, target = random.choice(data)
show_image_with_objects(img, target['boxes'], target['labels'])

# Prepare the transformation for augmentations

In [None]:
train_transform = albumentations.Compose(
    [
        albumentations.Resize(height=FUSED_SHAPE[1], width=FUSED_SHAPE[0]),
        albumentations.HorizontalFlip(p=0.5),
#         albumentations.Rotate(p=0.5),
        albumentations.Blur(p=0.1),
        albumentations.CLAHE(p=0.1),
#         albumentations.ToGray(p=0.1),
        albumentations.MedianBlur(p=0.1),
        albumentations.pytorch.transforms.ToTensorV2()
    ],
    bbox_params=albumentations.BboxParams(format='pascal_voc', label_fields=['labels'])
)

test_transform = albumentations.Compose(
    [
        albumentations.Resize(height=FUSED_SHAPE[1], width=FUSED_SHAPE[0]),
        albumentations.pytorch.transforms.ToTensorV2()
    ],
    bbox_params=albumentations.BboxParams(format='pascal_voc', label_fields=['labels'])
)

# Separation of data into training, validation and test samples

In [None]:
train_data, test_data = train_test_split(list(zip(image_path_list, objects)), test_size=0.3, shuffle=True, random_state=42)
val_data, data_data = train_test_split(test_data, test_size=0.1, shuffle=True, random_state=42)

train_data = AlfaFoodDataset(images=list(items[0] for items in train_data),
                             objects=list(items[1] for items in train_data),
                             transform=train_transform)

test_data = AlfaFoodDataset(images=list(items[0] for items in test_data),
                            objects=list(items[1] for items in test_data),
                            transform=test_transform )

val_data = AlfaFoodDataset(images=list(items[0] for items in val_data),
                           objects=list(items[1] for items in val_data),
                           transform=test_transform )

In [None]:
img, target = train_data[5]
print(img.shape)
show_image_with_objects(img, target['boxes'], target['labels'])

# Create Dataloaders

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataloader = torch.utils.data.DataLoader(dataset=train_data,
                                               batch_size=8,
                                               shuffle=True,
                                               num_workers=os.cpu_count(),
                                               collate_fn=collate_fn)
val_dataloader = torch.utils.data.DataLoader(dataset=val_data,
                                             batch_size=2,
                                             shuffle=False,
                                             num_workers=os.cpu_count(),
                                             collate_fn=collate_fn)
test_dataloader = torch.utils.data.DataLoader(dataset=test_data,
                                              batch_size=2,
                                              shuffle=False,
                                              num_workers=os.cpu_count(),
                                              collate_fn=collate_fn)

# Create model

In [None]:
class FasterRCNN_ResNet50(torch.nn.Module):
    def __init__(self, num_classes: int=127) -> None:
        super().__init__()

        self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True, pretrained_backbone=True)
        num_classes = num_classes + 2
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        for child in list(self.model.children()):
              for param in child.parameters():
                    param.requires_grad = True

    def predict(self, X: torch.Tensor) -> torch.Tensor:
        '''
        For predict bboxes and labels
        '''
        return self.model(X)

    # To calculate the loss function
    def forward(self, images: List[torch.Tensor], annotation: List[Dict[str, torch.Tensor]]) -> Dict[str, int]:
        return self.model(images, annotation)

model = FasterRCNN_ResNet50(num_classes=data.num_classes)

# Function for calculate IoU metrics

In [None]:
def get_IoU(bbox1: torch.Tensor, bbox2: torch.Tensor) -> float:
    x_left = max(bbox1[0], bbox2[0])
    x_right = min(bbox1[2], bbox2[2])

    y_bottom = max(bbox1[1], bbox2[1])
    y_top = min(bbox1[3], bbox2[3])

    intersection_bboxes = (y_top - y_bottom) * (x_right - x_left)
    union_bboxes = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]) + \
                 (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]) - intersection_bboxes

    return intersection_bboxes / union_bboxes

assert(get_IoU(target['boxes'][0], target['boxes'][0]) == 1)

## Metrics

In [None]:
import torch
from torchmetrics import Metric
import numpy as np

class MAP50(Metric):
    def __init__(self, num_classes, dist_sync_on_step=False):
        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.num_classes = num_classes
        self.add_state("pred_boxes", default=[], dist_reduce_fx="cat")
        self.add_state("true_boxes", default=[], dist_reduce_fx="cat")

    def update(self, preds, targets):
        """Update state with predictions and targets."""
        if not (isinstance(preds, list) and isinstance(targets, list)):
            raise ValueError("Preds and targets must be lists.")
        
        if not (all(isinstance(p, dict) for p in preds) and all(isinstance(t, dict) for t in targets)):
            raise ValueError("Preds and targets must be lists of dictionaries.")
        
        for pred, target in zip(preds, targets):
            pred_boxes = pred['boxes'].tolist()
            pred_scores = pred['scores'].tolist()
            true_boxes = target['boxes'].tolist()

            self.pred_boxes.append((pred_boxes, pred_scores))
            self.true_boxes.append(true_boxes)

    def compute(self):
        """Compute the final mAP50."""
        return calculate_map(self.pred_boxes, self.true_boxes, iou_threshold=0.5)

class MAP50_95(Metric):
    def __init__(self, num_classes, dist_sync_on_step=False):
        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.num_classes = num_classes
        self.add_state("pred_boxes", default=[], dist_reduce_fx="cat")
        self.add_state("true_boxes", default=[], dist_reduce_fx="cat")

    def update(self, preds, targets):
        """Update state with predictions and targets."""
        if not (isinstance(preds, list) and isinstance(targets, list)):
            raise ValueError("Preds and targets must be lists.")
        
        if not (all(isinstance(p, dict) for p in preds) and all(isinstance(t, dict) for t in targets)):
            raise ValueError("Preds and targets must be lists of dictionaries.")
        
        for pred, target in zip(preds, targets):
            pred_boxes = pred['boxes'].tolist()
            pred_scores = pred['scores'].tolist()
            true_boxes = target['boxes'].tolist()

            self.pred_boxes.append((pred_boxes, pred_scores))
            self.true_boxes.append(true_boxes)

    def compute(self):
        """Compute the final mAP50-95."""
        return calculate_map50_95(self.pred_boxes, self.true_boxes)

# Helper functions
def calculate_iou(box1, box2):
    """Calculate Intersection over Union (IoU) of two bounding boxes."""
    x1, y1, x2, y2 = box1
    x1_gt, y1_gt, x2_gt, y2_gt = box2
    
    xi1 = max(x1, x1_gt)
    yi1 = max(y1, y1_gt)
    xi2 = min(x2, x2_gt)
    yi2 = min(y2, y2_gt)
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2_gt - x1_gt) * (y2_gt - y1_gt)
    
    union_area = box1_area + box2_area - inter_area
    
    iou = inter_area / union_area
    return iou

def average_precision(recalls, precisions):
    """Calculate the average precision (AP) given recall and precision arrays."""
    recalls = np.concatenate(([0.0], recalls, [1.0]))
    precisions = np.concatenate(([0.0], precisions, [0.0]))
    
    for i in range(precisions.size - 1, 0, -1):
        precisions[i - 1] = np.maximum(precisions[i - 1], precisions[i])
    
    indices = np.where(recalls[1:] != recalls[:-1])[0]
    ap = np.sum((recalls[indices + 1] - recalls[indices]) * precisions[indices + 1])
    return ap

def calculate_map(pred_boxes, true_boxes, iou_threshold=0.5):
    """Calculate mean Average Precision (mAP) for a specific IoU threshold."""
    average_precisions = []
    
    for c in range(len(pred_boxes)):
        true_class_boxes = true_boxes[c]
        pred_class_boxes, pred_scores = pred_boxes[c]
        
        if len(true_class_boxes) == 0:
            average_precisions.append(0)
            continue
        
        if len(pred_class_boxes) == 0:
            average_precisions.append(0)
            continue
        
        true_class_boxes_detected = np.zeros(len(true_class_boxes))
        pred_indices = np.argsort(-np.array(pred_scores))
        pred_class_boxes = np.array(pred_class_boxes)[pred_indices].tolist()
        
        tp = np.zeros(len(pred_class_boxes))
        fp = np.zeros(len(pred_class_boxes))
        
        for pred_idx, pred_box in enumerate(pred_class_boxes):
            max_iou = 0
            max_iou_idx = -1
            
            for true_idx, true_box in enumerate(true_class_boxes):
                iou = calculate_iou(pred_box, true_box)
                
                if iou > max_iou:
                    max_iou = iou
                    max_iou_idx = true_idx
            
            if max_iou >= iou_threshold:
                if true_class_boxes_detected[max_iou_idx] == 0:
                    tp[pred_idx] = 1
                    true_class_boxes_detected[max_iou_idx] = 1
                else:
                    fp[pred_idx] = 1
            else:
                fp[pred_idx] = 1
        
        tp_cumsum = np.cumsum(tp)
        fp_cumsum = np.cumsum(fp)
        
        recalls = tp_cumsum / len(true_class_boxes)
        precisions = tp_cumsum / (tp_cumsum + fp_cumsum)
        
        ap = average_precision(recalls, precisions)
        average_precisions.append(ap)
    
    mean_ap = np.mean(average_precisions)
    return mean_ap

def calculate_map50_95(pred_boxes, true_boxes):
    """Calculate mAP50-95, which is the mean mAP over IoU thresholds from 0.5 to 0.95 with a step of 0.05."""
    iou_thresholds = np.arange(0.5, 1.0, 0.05)
    map_values = []
    
    for iou_threshold in iou_thresholds:
        map_value = calculate_map(pred_boxes, true_boxes, iou_threshold)
        map_values.append(map_value)
    
    mean_map = np.mean(map_values)
    return mean_map

# Пример использования
preds = [
    dict(
        boxes=torch.tensor([[258.0, 41.0, 606.0, 285.0]]),
        scores=torch.tensor([0.536]),
        labels=torch.tensor([0]),
    )
]
targets = [
    dict(
        boxes=torch.tensor([[214.0, 41.0, 562.0, 285.0]]),
        labels=torch.tensor([0]),
    )
]

# Преобразование данных в формат для вычисления
def prepare_data(preds, targets):
    num_classes = max(max(p['labels']) + 1 for p in preds)
    pred_boxes = {i: ([], []) for i in range(num_classes)}
    true_boxes = {i: [] for i in range(num_classes)}
    
    for pred in preds:
        labels = pred['labels'].tolist()
        boxes = pred['boxes'].tolist()
        scores = pred['scores'].tolist()
        for label in labels:
            pred_boxes[label][0].extend(boxes)
            pred_boxes[label][1].extend(scores)
    
    for target in targets:
        labels = target['labels'].tolist()
        boxes = target['boxes'].tolist()
        for label in labels:
            true_boxes[label].extend(boxes)
    
    return [dict(boxes=torch.tensor(pred_boxes[i][0]), scores=torch.tensor(pred_boxes[i][1]), labels=torch.tensor([i]*len(pred_boxes[i][0]))) for i in range(num_classes)], [dict(boxes=torch.tensor(true_boxes[i]), labels=torch.tensor([i]*len(true_boxes[i]))) for i in range(num_classes)]

pred_boxes, true_boxes = prepare_data(preds, targets)

# Проверка форматов данных
print("Pred Boxes:", pred_boxes)
print("True Boxes:", true_boxes)

# Вычисление mAP50 и mAP50-95
map50_metric = MAP50(num_classes=len(pred_boxes))
map50_metric.update(pred_boxes, true_boxes)
map50 = map50_metric.compute()

map50_95_metric = MAP50_95(num_classes=len(pred_boxes))
map50_95_metric.update(pred_boxes, true_boxes)
map50_95 = map50_95_metric.compute()

print(f"mAP50: {map50}")
print(f"mAP50-95: {map50_95}")


# Declare functions for training, validation and testing

In [None]:
targets

In [None]:
def train_step(device: torch.device,
               model: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               dataloader: torch.utils.data.DataLoader,
               criterion: torch.nn.Module = None,
               scheduler: torch.optim.lr_scheduler = None):

    model.to(device)
    model.train()

    train_loss = 0
    torch.cuda.empty_cache()

    time_epoch_start = time()

    for batch, (images, annotations) in enumerate(dataloader):

        images = [torch.Tensor(image).to(device) for image in images]
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]

        if criterion == None:
            loss_dict = model(images, annotations)
            loss = sum(loss for loss in loss_dict.values())
        else:
            pass
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()

        #     print(f"LOSSES: {loss}")
        # print(model(images[0]))

    train_loss = train_loss / len(dataloader)
    time_epoch_end = time() - time_epoch_start

    return train_loss, time_epoch_end



@torch.no_grad()
def validation_step(device: torch.device,
                    model: torch.nn.Module,
                    dataloader: torch.utils.data.DataLoader,
                    criterion: torch.nn.Module = None):
    model.to(device)
    model.eval()

    val_loss = 0
    min_val_loss = 1e6
    torch.cuda.empty_cache()

    time_epoch_start = time()
    with torch.inference_mode():
        for batch, (images, annotations) in enumerate(dataloader):

            images = [torch.Tensor(image).to(device) for image in images]
            annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]

            if criterion == None:
                model.train()
                loss_dict = model(images, annotations)
                # print(loss_dict)
                loss = sum(loss for loss in loss_dict.values())
                model.eval()
            else:
                pass
            val_loss += loss.item()

      # print(f"TESTING MODEL {model.predict(images)}"
      # print(f"LOSSES: {loss}")

    val_loss /= len(dataloader)
    time_epoch_end = time() - time_epoch_start

  # val_loss = validate(epoch)
    if val_loss < min_val_loss:
        print('NEW BEST MODEL!')
        torch.save(model.state_dict(), 'best_model.pth')
        min_val_loss = val_loss
    torch.save(model.state_dict(), 'latest_model.pth')

    return val_loss, time_epoch_end



def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          val_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          epochs: int,
          device: torch.device):
    
    log_output = {
                  'train_loss': [],
                  'val_loss': [],
                  'val_mAP': []
                 }
    
    for epoch in tqdm(range(epochs)):
        train_loss, train_train_epoch = train_step(model=model,
                                                   dataloader=train_dataloader,
                                                   optimizer=optimizer,
                                                   device=device)

        print(f"EPOCH: {epoch+1} | TRAIN LOSS: {train_loss} | TRAIN TIME: {train_train_epoch}")

        val_loss, val_time_epoch = validation_step(model=model,
                                                   dataloader=test_dataloader,
                                                   device=device)
        log_output['train_loss'].append(train_loss)
        log_output['val_loss'].append(val_loss)
#         log_output['val_mAP'].append(train_loss)

        print(f"EPOCH: {epoch+1} | VAL LOSS: {val_loss} | VAL TIME: {val_time_epoch}\n")
        
        wandb.log(log_output)

In [None]:
# LOSSES: {'loss_classifier': tensor(141.1330, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(2.4547, grad_fn=<DivBackward0>), 'loss_objectness': tensor(133.6202), 'loss_rpn_box_reg': tensor(8.3771)}

In [None]:
# num_epochs = 10
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 
# params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.AdamW(params, lr=1e-4, weight_decay=1e-7)

# train(model, train_dataloader, val_dataloader, optimizer, num_epochs, device)

In [None]:
import pathlib

path_to_weights_model = pathlib.Path("/kaggle/input/faster-rcnn/pytorch/default/1/best_model (5).pth")

model.load_state_dict(torch.load(path_to_weights_model, map_location=torch.device('cpu')))

In [None]:
# from torchmetrics.detection import MeanAveragePrecision

# metric = MeanAveragePrecision()
# metric.update(outputs, target)

# from pprint import pprint
# pprint(metric.compute())

In [None]:
image, target = random.choice(test_data)
print(image.shape)
# images = [torch.Tensor(image.transpose(2, 0, 1)).to(device) for image in images]
# annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]

with torch.no_grad():
    model.eval()
    model.to(device)
    outputs = model.predict(torch.Tensor(image).unsqueeze(dim=0).to(device))

bboxes, labels, scores = objects_threshold_scores(outputs[0]['boxes'].to('cpu'), outputs[0]['labels'], outputs[0]['scores'], 0.3)
print(len(labels), len(bboxes))

pred_boxes, true_boxes = prepare_data(outputs, [target])

# Проверка форматов данных
# print("Pred Boxes:", pred_boxes)
# print("True Boxes:", true_boxes)

# Вычисление mAP50 и mAP50-95
map50_metric = MAP50(num_classes=len(pred_boxes))
map50_metric.update(pred_boxes, true_boxes)
map50 = map50_metric.compute()
print(f"mAP50: {map50}")

fig, axes = plt.subplots(1, 2, figsize=(15, 7))
axes[0].imshow(show_image_with_objects(image, bboxes, labels, scores, 0.3))

axes[1].imshow(show_image_with_objects(image, target['boxes'], target['labels']))

axes[0].set_title("Prediction")
axes[1].set_title("Ground Truth")

axes[0].axis('off')
axes[1].axis('off')

In [None]:
wandb.finish()