In [1]:
import torch
import os
import numpy as np
from tqdm import tqdm
from datetime import datetime
from torch.utils.data import DataLoader
from custom_data import CustomYoloDataset
from loss import SumSquaredErrorLoss
from models import *

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
torch.autograd.set_detect_anomaly(True)
now = datetime.now()

model = YOLOv1()
model.to(device)
loss_function = SumSquaredErrorLoss()

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=config.LEARNING_RATE
)

# Dataset directory
dir = r"C:\Users\alkan\.cache\kagglehub\datasets\a2015003713\militaryaircraftdetectiondataset\versions\87\dataset"
train_set = CustomYoloDataset("train", dir, dir, normalize=True, augment=True)
test_set = CustomYoloDataset("test", dir, dir, normalize=True, augment=True)

train_loader = DataLoader(
    train_set,
    batch_size=config.BATCH_SIZE,
    drop_last=True,
    shuffle=True
)
test_loader = DataLoader(
    test_set,
    batch_size=config.BATCH_SIZE,
    drop_last=True
)

In [4]:
print("CUDA available:", torch.cuda.is_available())
print("CUDA device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No CUDA device")


CUDA available: True
CUDA device: NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [4]:
import torch
print(torch.version.cuda)
print(torch.backends.cudnn.version())

12.4
90100


In [6]:
print(next(model.parameters()).device)

cuda:0


In [None]:


# Output folder
root = os.path.join('models', 'yolo_v1', now.strftime('%m_%d_%Y'), now.strftime('%H_%M_%S'))
weight_dir = os.path.join(root, 'weights')
if not os.path.isdir(weight_dir):
    os.makedirs(weight_dir)

# Metrics
train_losses = np.empty((2, 0))
test_losses = np.empty((2, 0))
train_errors = np.empty((2, 0))
test_errors = np.empty((2, 0))

def save_metrics():
    np.save(os.path.join(root, 'train_losses'), train_losses)
    np.save(os.path.join(root, 'test_losses'), test_losses)
    np.save(os.path.join(root, 'train_errors'), train_errors)
    np.save(os.path.join(root, 'test_errors'), test_errors)

In [5]:
#####################
#       Train       #
#####################
for epoch in range(config.WARMUP_EPOCHS + config.EPOCHS):
    model.train()
    train_loss = 0
    for data, labels, _ in tqdm(train_loader, desc='Train', leave=False):
        data = data.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        predictions = model(data)
        loss = loss_function(predictions, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() / len(train_loader)
        del data, labels

    train_losses = np.append(train_losses, [[epoch], [train_loss]], axis=1)
    print(f"[Epoch {epoch}] Train Loss: {train_loss:.4f}")

    if epoch % 4 == 0:
        model.eval()
        with torch.no_grad():
            test_loss = 0
            for data, labels, _ in tqdm(test_loader, desc='Test', leave=False):
                data = data.to(device)
                labels = labels.to(device)

                predictions = model(data)
                loss = loss_function(predictions, labels)

                test_loss += loss.item() / len(test_loader)
        test_losses = np.append(test_losses, [[epoch], [test_loss]], axis=1)
        print(f"[Epoch {epoch}] Test Loss: {test_loss:.4f}")
        save_metrics()

save_metrics()
torch.save(model.state_dict(), os.path.join(weight_dir, 'final'))

Train:   0%|          | 0/280 [00:00<?, ?it/s]

                                              

RuntimeError: output with shape [1, 448, 448] doesn't match the broadcast shape [3, 448, 448]