In [1]:
# auto reload on file change
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image
import torchvision.models as models
from tqdm import tqdm

In [None]:
from src.yolov8 import *
from src.yoloTrain import *
from src.yoloDataset import *
from src.yoloLoss import *
from src.utils import *

In [3]:
from src.datasetSetup import srediDataset
srediDataset("datasetoviraw", "datasetyolo.zip")

File downloaded or caches
File unziped
Dataset je uspesno organizovan
Finished dataset customization


In [4]:
features_yolo = nn.Sequential(
    nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2) 
)
def collate_fn(batch):
    images, targets = zip(*batch)
    images = list(images)
    target_tensors = []
    
    for target in targets:
        xx = target[0][:,:,0,:]
        boxes = torch.tensor(xx, dtype=torch.float32)
        labels = torch.tensor(target[1], dtype=torch.int64)
        target_tensor = torch.cat((boxes, labels), dim=2)
        target_tensors.append(target_tensor)
    
    target_tensors = torch.stack(target_tensors)   

    return images, target_tensors

In [5]:
train_img_dir = 'datasetoviraw/datasetyolo.zip_unzip/train/images'
train_labels_dir = 'datasetoviraw/datasetyolo.zip_unzip/train/labels'


train_dataset = YoloV8Dataset(train_img_dir, train_labels_dir, transform=transform_yolo)
train_loader = DataLoader(train_dataset, batch_size=batch_size_yolo, shuffle=True, num_workers=0, collate_fn=collate_fn)

In [6]:
model = YOLOv8(features=features_yolo)
optimizer = optim.Adam(model.fc_layers.parameters(), lr=0.001)



In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = model.to(device)
print(device)

In [None]:
feature_size_yolo = 7  
num_bboxes_yolo = 1 
num_classes_yolo = 4 
lambda_coord = 5.0
lambda_noobj = 0.5
learning_rate = 1e-3

In [8]:
backbone = models.resnet18(pretrained=True)
backbone = nn.Sequential(*list(backbone.children())[:-2]) 

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
yolo_loss = YOLOv8Loss(feature_size=7, num_bboxes=1, num_classes=4, lambda_box=1.0, lambda_cls=1.0, lambda_df=1.0, phi=0.0005) 

In [10]:
criterion = YOLOv8Loss(feature_size=7, num_bboxes=1, num_classes=4, lambda_box=1.0, lambda_cls=1.0, lambda_df=1.0, phi=0.0005)

for epoch in range(num_epochs_yolo):
    epoch_loss = train(model, optimizer, train_loader, device, criterion)
    print(f'Epoch {epoch+1}/{num_epochs_yolo}, Loss: {epoch_loss:.4f}')

Training Epoch:   0%|          | 0/77 [00:00<?, ?it/s]

Box tensor shape: torch.Size([7, 7, 1, 5])
Class tensor shape: torch.Size([7, 7, 10])
Box tensor: tensor([[[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],


        [[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.5404, 0.2477, 0.0089, 0.0250, 1.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],


        [[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.2635, 0.3560, 0.0073, 0.0361, 1.0000]],

         [[0.4086, 0.3778, 0.0120,

Training Epoch:   0%|          | 0/77 [00:00<?, ?it/s]

Box tensor shape: torch.Size([7, 7, 1, 5])
Class tensor shape: torch.Size([7, 7, 10])
Box tensor: tensor([[[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],


        [[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.3065, 0.2611, 0.0089, 0.0296, 1.0000]],

         [[0.4729, 0.2569, 0.0083, 0.0287, 1.0000]],

         [[0.5943, 0.2620, 0.0052, 0.0259, 1.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],


        [[[0.0969, 0.3301, 0.0083, 0.0361, 1.0000]],

         [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],

         [[0.3799, 0.2898, 0.0068,




ValueError: too many values to unpack (expected 2)