In [10]:
from PIL import Image
import os
import cv2
import numpy as np
import xml.etree.ElementTree as ET
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
import torch.nn.functional as F
import torchvision.ops as ops 
from torchvision.ops import roi_pool
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

In [11]:
data = {
    'filename': [],
    'width': [],
    'height': [],
    'class': [],
    'xmin': [],
    'ymin': [],
    'xmax': [],
    'ymax': []
}

In [12]:
def get_file_image_dimensions(file_path):
    if not os.path.isfile(file_path):
        return None, None
    with Image.open(file_path) as img:
        width, height = img.size
    return width, height

def get_xml_image_dimensions(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    size = root.find('size')
    if size is not None:
        width = size.find('width').text
        height = size.find('height').text
        if width and height:
            return int(width), int(height)
    return 0, 0  


def get_image_dimensions(xml_file, image_file_path):
    width, height = get_xml_image_dimensions(xml_file)
    
    if width == 0 or height == 0:
        width, height = get_file_image_dimensions(image_file_path)
        
    return width, height


def parse_xml(xml_file, image_file_path):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    filename = root.find('filename').text
    
    width, height = get_image_dimensions(xml_file, image_file_path)


    for obj in root.iter('object'):
        obj_class = obj.find('name').text
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)

        data['filename'].append(filename)
        data['width'].append(width)
        data['height'].append(height)
        data['class'].append(obj_class)
        data['xmin'].append(xmin)
        data['ymin'].append(ymin)
        data['xmax'].append(xmax)
        data['ymax'].append(ymax)


In [13]:
class FruitDataset(Dataset):
    def __init__(self, data_dir, transforms=None, image_size=(416, 416)):
        self.data_dir = data_dir
        self.transforms = transforms
        self.image_size = image_size 
        
        self.images = [f for f in os.listdir(data_dir) if f.endswith('.jpg')]
        
        for image_file in self.images:
            xml_file = image_file.replace('.jpg', '.xml')
            xml_path = os.path.join(data_dir, xml_file)
            image_path = os.path.join(data_dir, image_file)
            if os.path.exists(xml_path):
                parse_xml(xml_path, image_path)
        
        self.dataframe = pd.DataFrame(data)

    def __len__(self):
        return len(self.images)
    
    def class_to_label(self, class_name):
        class_mapping = {'apple': 1, 'banana': 2, 'orange': 3, 'mixed': 4}
        return class_mapping.get(class_name, 0) 
    
    def __getitem__(self, idx):
        image_name = self.images[idx]
        image_path = os.path.join(self.data_dir, image_name)

    # Завантажуємо зображення
        image = cv2.imread(image_path)
    
    # Перетворюємо в RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

        boxes = []
        labels = []
    
        image_data = self.dataframe[self.dataframe['filename'] == image_name]
        for _, row in image_data.iterrows():
            xmin = row['xmin']
            ymin = row['ymin']
            xmax = row['xmax']
            ymax = row['ymax']
            label = self.class_to_label(row['class'])
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(label)
    
        orig_height, orig_width = image.shape[:2]
    
    # Приведення всіх зображень до одного розміру
        image = cv2.resize(image, self.image_size)
    
    # Пропорційне масштабування bounding boxes
        scale_x = self.image_size[0] / orig_width
        scale_y = self.image_size[1] / orig_height
        boxes = [[xmin * scale_x, ymin * scale_y, xmax * scale_x, ymax * scale_y] for xmin, ymin, xmax, ymax in boxes]
    
        boxes = [[xmin / self.image_size[0], ymin / self.image_size[1], xmax / self.image_size[0], ymax / self.image_size[1]] for xmin, ymin, xmax, ymax in boxes]

        if self.transforms:
            transformed = self.transforms(image=image, bboxes=boxes, labels=labels)
            image = transformed['image']
            boxes = torch.as_tensor(transformed['bboxes'], dtype=torch.float32)

        labels = torch.as_tensor(labels, dtype=torch.int64)
    
        target = {"boxes": boxes, "labels": labels}
    
        return image, target

In [14]:
transform = A.Compose([
    
    # Дзеркально відображає зобреження, щоб в подальшому модель звикала до симетрії(обʼєкт може бути як ліворуч так і праворуч)
    A.HorizontalFlip(p=0.5),
    # Перевертає зображення щоб якщо обʼєкти були перевернутими, або нахиленимим, модель всеодно їх впізнавала
    A.Rotate(limit=15, p=0.5),
    # Допомагає моделі розпізнавати обʼєкти не залежно від умов освітлення
    A.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.2, p=0.5),
    # моделі в PyTorch очікують вхідні дані у вигляді тензорів
    ToTensorV2(p=1.0),
], bbox_params=A.BboxParams(format='albumentations', label_fields=['labels']))

dataset = FruitDataset(data_dir='./datasets/train_zip/train', transforms=transform, image_size=(512, 512))

dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

for images, targets in dataloader:
    print(images[0].shape, targets[0]['boxes'])

torch.Size([3, 512, 512]) tensor([[0.2032, 0.0000, 0.9268, 0.7233],
        [0.5275, 0.4868, 1.0000, 0.9743]])
torch.Size([3, 512, 512]) tensor([[0.1287, 0.3500, 0.8188, 0.8067]])
torch.Size([3, 512, 512]) tensor([[0.2149, 0.1188, 0.5026, 0.4992],
        [0.4612, 0.1452, 0.7340, 0.5370],
        [0.5122, 0.4523, 0.8327, 0.8599]])
torch.Size([3, 512, 512]) tensor([[0.2931, 0.0000, 0.9514, 0.3156]])
torch.Size([3, 512, 512]) tensor([[0.0158, 0.1696, 0.8481, 0.9178],
        [0.3989, 0.1948, 0.8090, 0.8462]])
torch.Size([3, 512, 512]) tensor([[0.3070, 0.1860, 0.7578, 0.6324],
        [0.2309, 0.5353, 0.6614, 0.9070],
        [0.7102, 0.6140, 0.9878, 0.9460],
        [0.5756, 0.2344, 0.9358, 0.6268],
        [0.0022, 0.0280, 0.4305, 0.5052]])




torch.Size([3, 512, 512]) tensor([[0.0833, 0.1400, 0.9167, 0.9900]])
torch.Size([3, 512, 512]) tensor([[0.0830, 0.0672, 0.9750, 0.8857]])
torch.Size([3, 512, 512]) tensor([[0.0249, 0.1768, 0.6795, 0.9972]])
torch.Size([3, 512, 512]) tensor([[0.0257, 0.1812, 0.5213, 0.8900],
        [0.5008, 0.1767, 0.8586, 0.6693],
        [0.3263, 0.4699, 0.7141, 0.9636]])
torch.Size([3, 512, 512]) tensor([[0.4571, 0.2979, 0.8496, 0.8917],
        [0.0695, 0.2604, 0.5108, 0.9038]])
torch.Size([3, 512, 512]) tensor([[0.6325, 0.4916, 0.8375, 0.7730],
        [0.5662, 0.0826, 0.8050, 0.4503],
        [0.4212, 0.0732, 0.5788, 0.7992],
        [0.1963, 0.2946, 0.5263, 0.9099],
        [0.3200, 0.0713, 0.5125, 0.7786]])
torch.Size([3, 512, 512]) tensor([[0.0217, 0.1745, 0.6200, 0.9445],
        [0.4817, 0.1388, 0.9836, 0.8177]])
torch.Size([3, 512, 512]) tensor([[0.3857, 0.0086, 0.9386, 0.5600],
        [0.0529, 0.2143, 0.2657, 0.6771],
        [0.1771, 0.3486, 0.9514, 0.8886]])
torch.Size([3, 512, 512]) te



tensor([[0.0090, 0.2392, 0.6103, 1.0000],
        [0.3910, 0.0911, 0.9962, 1.0000]])
torch.Size([3, 512, 512]) tensor([[0.1015, 0.1926, 0.5413, 0.8410],
        [0.4743, 0.1141, 0.9318, 0.7663]])
torch.Size([3, 512, 512]) tensor([[0.1800, 0.2080, 0.8520, 0.8880]])
torch.Size([3, 512, 512]) tensor([[0.3075, 0.2198, 0.9410, 0.9462]])
torch.Size([3, 512, 512]) tensor([[0.0636, 0.0818, 0.9727, 0.9364]])
torch.Size([3, 512, 512]) tensor([[0.4891, 0.1785, 0.7469, 0.4061],
        [0.1984, 0.1727, 0.5000, 0.4329],
        [0.1141, 0.4271, 0.9656, 0.7223]])
torch.Size([3, 512, 512]) tensor([[0., 0., 1., 1.]])
torch.Size([3, 512, 512]) tensor([[0.1760, 0.1072, 0.8258, 0.9156],
        [0.0675, 0.1666, 0.6538, 1.0000],
        [0.2377, 0.0656, 0.8706, 0.6944]])




torch.Size([3, 512, 512]) tensor([[0.4104, 0.3447, 1.0000, 1.0000]])
torch.Size([3, 512, 512]) tensor([[0.0077, 0.3471, 0.9369, 0.8168]])
torch.Size([3, 512, 512]) tensor([[0.2896, 0.1253, 0.9035, 0.9896]])
torch.Size([3, 512, 512]) tensor([[0.0015, 0.2375, 0.7755, 0.9389]])
torch.Size([3, 512, 512]) tensor([[0.2517, 0.1491, 0.9067, 0.9814]])
torch.Size([3, 512, 512]) tensor([[0.0785, 0.6733, 0.9607, 0.9030]])
torch.Size([3, 512, 512]) tensor([[0.1463, 0.2129, 0.4642, 0.7479],
        [0.5884, 0.3595, 0.8821, 0.7905],
        [0.2653, 0.4634, 0.5568, 0.8756],
        [0.7358, 0.3152, 0.9884, 0.7717]])
torch.Size([3, 512, 512]) tensor([[0.0460, 0.1100, 0.9160, 0.9940]])
torch.Size([3, 512, 512]) tensor([[0.1299, 0.2346, 0.4851, 0.7106],
        [0.5179, 0.2175, 0.9000, 0.6815]])
torch.Size([3, 512, 512]) tensor([[0.0000, 0.0000, 0.6519, 1.0000]])
torch.Size([3, 512, 512]) tensor([[0.0270, 0.0335, 0.9717, 0.9411]])
torch.Size([3, 512, 512]) tensor([[0.1425, 0.1823, 0.8798, 1.0000]])
torc

In [15]:
for images, targets in train_loader:
    images = torch.stack(images).to(device)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    
    bbox_preds, class_preds = model(images)

    print(f"Розмірність передбачених bounding boxes: {bbox_preds.shape}")
    print(f"Розмірність реальних bounding boxes: {[t['boxes'].shape for t in targets]}")

    bboxes = torch.cat([t['boxes'] for t in targets], dim=0)
    labels = torch.cat([t['labels'] for t in targets], dim=0)

    print(f"Зведена розмірність реальних bounding boxes: {bboxes.shape}")
    print(f"Зведена розмірність передбачених bounding boxes: {bbox_preds.shape}")
    
    if bbox_preds.shape == bboxes.shape:
        bbox_loss = bbox_loss_fn(bbox_preds, bboxes)
        class_loss = class_loss_fn(class_preds, labels)
        loss = bbox_loss + class_loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    else:
        print("Розмірності не збігаються.")
        break

NameError: name 'train_loader' is not defined

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

class AlexNetObjectDetectorWithAnchors(nn.Module):
    def __init__(self, num_classes, num_anchors=9):
        super(AlexNetObjectDetectorWithAnchors, self).__init__()
        
        self.feature_extractor = models.alexnet(pretrained=True).features
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout()
        )

        self.bbox_head = nn.Linear(4096, num_anchors * 4)
        
        self.class_head = nn.Linear(4096, num_anchors * num_classes)

        self.num_classes = num_classes
        self.num_anchors = num_anchors

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.avgpool(x)
        x = self.fc(x)

        bbox_coords = self.bbox_head(x)
        bbox_coords = bbox_coords.view(-1, self.num_anchors, 4)

        class_scores = self.class_head(x)
        class_scores = class_scores.view(-1, self.num_anchors, self.num_classes)
        return bbox_coords, class_scores

num_classes = 4 
model = AlexNetObjectDetectorWithAnchors(num_classes=num_classes, num_anchors=9)

dummy_input = torch.randn(1, 3, 512, 512) 
bbox_coords, class_scores = model(dummy_input)

print("Bounding Box Coordinates:", bbox_coords)
print("Class Scores:", class_scores)


In [9]:
import torch
import torch.optim as optim
import torch.nn.functional as F

num_epochs = 20
learning_rate = 0.001

num_classes = 4 
num_anchors = 9 
model = AlexNetObjectDetectorWithAnchors(num_classes=num_classes, num_anchors=num_anchors)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

bbox_loss_fn = nn.MSELoss() 
class_loss_fn = nn.CrossEntropyLoss() 

for epoch in range(num_epochs):
    model.train()
    total_bbox_loss = 0.0
    total_class_loss = 0.0
    
    for images, targets in train_loader:
        images = torch.stack(images).to(device)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        bbox_preds, class_preds = model(images)
        
        batch_bbox_loss = 0.0
        batch_class_loss = 0.0

        for i, target in enumerate(targets):
            num_boxes = target['boxes'].shape[0]
            if num_boxes > num_anchors:
                bboxes = target['boxes'][:num_anchors]
                labels = target['labels'][:num_anchors]
            else:
                padding_boxes = torch.zeros((num_anchors - num_boxes, 4), device=device)
                padding_labels = torch.zeros(num_anchors - num_boxes, dtype=torch.long, device=device)
                bboxes = torch.cat([target['boxes'], padding_boxes], dim=0)
                labels = torch.cat([target['labels'], padding_labels], dim=0)
                
            batch_bbox_loss += bbox_loss_fn(bbox_preds[i], bboxes)
            batch_class_loss += class_loss_fn(class_preds[i], labels)
        
        batch_bbox_loss /= len(targets)
        batch_class_loss /= len(targets)

        loss = batch_bbox_loss + batch_class_loss
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_bbox_loss += batch_bbox_loss.item()
        total_class_loss += batch_class_loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], BBox Loss: {total_bbox_loss/len(train_loader):.4f}, Class Loss: {total_class_loss/len(train_loader):.4f}")

print("Тренування завершено.")



NameError: name 'AlexNetObjectDetectorWithAnchors' is not defined

In [81]:
import torch
from sklearn.metrics import accuracy_score

def evaluate_model(model, test_loader, device):
    model.eval() 
    total_bbox_loss = 0.0
    total_class_accuracy = 0.0
    total_samples = 0
    
    bbox_loss_fn = nn.MSELoss() 
    all_true_labels = []
    all_pred_labels = []

    with torch.no_grad(): 
        for images, targets in test_loader:
            images = torch.stack(images).to(device)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            bbox_preds, class_preds = model(images)

            batch_bbox_loss = 0.0
            batch_class_accuracy = 0.0

            for i, target in enumerate(targets):
                num_boxes = target['boxes'].shape[0]
                if num_boxes > num_anchors:
                    bboxes = target['boxes'][:num_anchors]
                    labels = target['labels'][:num_anchors]
                else:
                    padding_boxes = torch.zeros((num_anchors - num_boxes, 4), device=device)
                    padding_labels = torch.zeros(num_anchors - num_boxes, dtype=torch.long, device=device)
                    bboxes = torch.cat([target['boxes'], padding_boxes], dim=0)
                    labels = torch.cat([target['labels'], padding_labels], dim=0)
                
                batch_bbox_loss += bbox_loss_fn(bbox_preds[i], bboxes).item()

                _, predicted_classes = torch.max(class_preds[i], 1)
                all_true_labels.extend(labels.cpu().numpy())
                all_pred_labels.extend(predicted_classes.cpu().numpy())

            total_bbox_loss += batch_bbox_loss / len(targets)
            total_samples += 1

    avg_bbox_loss = total_bbox_loss / total_samples

    class_accuracy = accuracy_score(all_true_labels, all_pred_labels)

    print(f"Тестова BBox Loss: {avg_bbox_loss:.4f}")
    print(f"Точність класифікації: {class_accuracy * 100:.2f}%")

test_dataset = FruitDataset(data_dir='/Users/matvejzasadko/Downloads/All/Study/NNetworks/Lb1/archive/test_zip/test', transforms=transform, image_size=(512, 512))

test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
evaluate_model(model, test_loader, device)


Тестова BBox Loss: 0.0672
Точність класифікації: 61.11%




In [9]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision.ops import roi_pool

class RCNN(nn.Module):
    def __init__(self, num_classes, num_rois=9):
        super(RCNN, self).__init__()
        
        self.feature_extractor = models.alexnet(pretrained=True).features
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout()
        )
        
        self.bbox_head = nn.Linear(4096, num_rois * 4)
        
        self.class_head = nn.Linear(4096, num_rois * num_classes)
        
        self.num_classes = num_classes
        self.num_rois = num_rois

    def forward(self, x, rois):
        features = self.feature_extractor(x)
        
        pooled_features = roi_pool(features, rois, output_size=(6, 6))
        
        x = self.avgpool(pooled_features)
        x = self.fc(x)

        bbox_coords = self.bbox_head(x)
        bbox_coords = bbox_coords.view(-1, self.num_rois, 4)

        class_scores = self.class_head(x)
        class_scores = class_scores.view(-1, self.num_rois, self.num_classes)
        
        return bbox_coords, class_scores

num_classes = 4 
num_rois = 9
model = RCNN(num_classes=num_classes, num_rois=num_rois)

dummy_input = torch.randn(1, 3, 512, 512) 
dummy_rois = torch.tensor([[0, 50, 50, 400, 400]], dtype=torch.float)  # Example ROI [batch_index, x1, y1, x2, y2]

bbox_coords, class_scores = model(dummy_input, dummy_rois)

print("Bounding Box Coordinates:", bbox_coords)
print("Class Scores:", class_scores)




Bounding Box Coordinates: tensor([[[-1.7623e-02, -1.4542e-03, -7.1488e-03, -6.4496e-03],
         [-1.8502e-02, -3.9880e-03, -2.2023e-02,  1.1719e-02],
         [ 1.3590e-02,  8.3991e-03, -5.2298e-04, -1.2157e-03],
         [ 4.5781e-03,  1.6443e-02, -1.7070e-03, -1.1228e-02],
         [ 1.3366e-02,  1.3399e-04, -1.4536e-03, -6.0596e-03],
         [-2.0067e-02, -8.9800e-05, -8.4977e-03,  6.0467e-03],
         [-9.0072e-03, -1.5734e-02, -1.2572e-02,  1.3781e-02],
         [ 1.2443e-02,  7.8619e-04, -9.0144e-04, -2.1730e-03],
         [-1.3206e-02,  1.2758e-02, -1.1566e-02, -8.8705e-03]]],
       grad_fn=<ViewBackward0>)
Class Scores: tensor([[[ 0.0016,  0.0119, -0.0045,  0.0083],
         [ 0.0141, -0.0004, -0.0059, -0.0159],
         [-0.0107,  0.0076, -0.0028,  0.0066],
         [-0.0173,  0.0098,  0.0153,  0.0020],
         [ 0.0084, -0.0009,  0.0077, -0.0178],
         [ 0.0061,  0.0053,  0.0114, -0.0139],
         [ 0.0076,  0.0196, -0.0102, -0.0050],
         [ 0.0133, -0.0023, -0

In [14]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn

num_epochs = 10
learning_rate = 0.001

num_classes = 4 
num_rois = 9
model = RCNN(num_classes=num_classes, num_rois=num_rois)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

bbox_loss_fn = nn.MSELoss() 
class_loss_fn = nn.CrossEntropyLoss() 

for epoch in range(num_epochs):
    model.train()
    total_bbox_loss = 0.0
    total_class_loss = 0.0
    
    for images, targets in dataloader: 
        images = torch.stack(images).to(device)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        rois = []
        for idx in range(len(images)):
            roi = torch.tensor([[idx, 50, 50, 400, 400],
                                [idx, 100, 100, 300, 300]], dtype=torch.float, device=device)
            rois.append(roi)
        
        rois = torch.cat(rois, dim=0) 
        
        bbox_preds, class_preds = model(images, rois)
        
        batch_bbox_loss = 0.0
        batch_class_loss = 0.0

        for i, target in enumerate(targets):
            num_boxes = target['boxes'].shape[0]
            if num_boxes > num_rois:
                bboxes = target['boxes'][:num_rois]
                labels = target['labels'][:num_rois]
            else:
                padding_boxes = torch.zeros((num_rois - num_boxes, 4), device=device)
                padding_labels = torch.zeros(num_rois - num_boxes, dtype=torch.long, device=device)
                bboxes = torch.cat([target['boxes'], padding_boxes], dim=0)
                labels = torch.cat([target['labels'], padding_labels], dim=0)
                
            batch_bbox_loss += bbox_loss_fn(bbox_preds[i], bboxes)
            batch_class_loss += class_loss_fn(class_preds[i], labels)
        
        batch_bbox_loss /= len(targets)
        batch_class_loss /= len(targets)

        loss = batch_bbox_loss + batch_class_loss
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_bbox_loss += batch_bbox_loss.item()
        total_class_loss += batch_class_loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], BBox Loss: {total_bbox_loss/len(dataloader):.4f}, Class Loss: {total_class_loss/len(dataloader):.4f}")

print("Тренування завершено.")




Epoch [1/10], BBox Loss: 0.0464, Class Loss: 0.6007




Epoch [2/10], BBox Loss: 0.0339, Class Loss: 0.4911




Epoch [3/10], BBox Loss: 0.0352, Class Loss: 0.4882




Epoch [4/10], BBox Loss: 0.0333, Class Loss: 0.4894




Epoch [5/10], BBox Loss: 0.0318, Class Loss: 0.4876




Epoch [6/10], BBox Loss: 0.0315, Class Loss: 0.4844




Epoch [7/10], BBox Loss: 0.0326, Class Loss: 0.4906




Epoch [8/10], BBox Loss: 0.0307, Class Loss: 0.4809




Epoch [9/10], BBox Loss: 0.0312, Class Loss: 0.4903




Epoch [10/10], BBox Loss: 0.0331, Class Loss: 0.4782
Тренування завершено.


In [16]:
from sklearn.metrics import accuracy_score

test_dataset = FruitDataset(data_dir='/Users/matvejzasadko/Downloads/All/Study/NNetworks/Lb1/archive/test_zip/test', 
                            transforms=transform, 
                            image_size=(512, 512))

test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

def evaluate_model(model, test_loader, device, num_rois=9):
    model.eval()  
    total_bbox_loss = 0.0
    all_true_labels = []
    all_pred_labels = []

    bbox_loss_fn = nn.MSELoss() 

    with torch.no_grad():  
        for images, targets in test_loader:
            images = torch.stack(images).to(device)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            rois = []
            for idx in range(len(images)):
                roi = torch.tensor([[idx, 50, 50, 400, 400],
                                    [idx, 100, 100, 300, 300]], dtype=torch.float, device=device)
                rois.append(roi)
            
            rois = torch.cat(rois, dim=0)  

            bbox_preds, class_preds = model(images, rois)

            batch_bbox_loss = 0.0

            for i, target in enumerate(targets):
                num_boxes = target['boxes'].shape[0]
                if num_boxes > num_rois:
                    bboxes = target['boxes'][:num_rois]
                    labels = target['labels'][:num_rois]
                else:
                    padding_boxes = torch.zeros((num_rois - num_boxes, 4), device=device)
                    padding_labels = torch.zeros(num_rois - num_boxes, dtype=torch.long, device=device)
                    bboxes = torch.cat([target['boxes'], padding_boxes], dim=0)
                    labels = torch.cat([target['labels'], padding_labels], dim=0)

                batch_bbox_loss += bbox_loss_fn(bbox_preds[i], bboxes).item()

                _, predicted_classes = torch.max(class_preds[i], 1)
                all_true_labels.extend(labels.cpu().numpy())
                all_pred_labels.extend(predicted_classes.cpu().numpy())

            total_bbox_loss += batch_bbox_loss / len(targets)

    avg_bbox_loss = total_bbox_loss / len(test_loader)

    class_accuracy = accuracy_score(all_true_labels, all_pred_labels)

    print(f"Тестова BBox Loss: {avg_bbox_loss:.4f}")
    print(f"Точність класифікації: {class_accuracy * 100:.2f}%")

evaluate_model(model, test_loader, device)


Тестова BBox Loss: 0.0812
Точність класифікації: 61.30%


