[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/khetansarvesh/CV/blob/main/YOLO/v1/runner.ipynb)

In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
from utils import (
    non_max_suppression,
    mean_average_precision,
    intersection_over_union,
    cellboxes_to_boxes,
    get_bboxes,
    plot_image,
    save_checkpoint,
    load_checkpoint,
)

In [None]:
DEVICE = "cuda" if torch.cuda.is_available else "cpu"

# Dataset

In [None]:
from dataset import VOCDataset

In [None]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes

transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])

In [None]:
# training dataset
train_dataset = VOCDataset("data/100examples.csv", transform=transform, img_dir="data/images", label_dir="data/labels")

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=16,
    num_workers=2,
    pin_memory=True,
    shuffle=True,
    drop_last=True)


In [None]:
# testing dataset
test_dataset = VOCDataset("data/test.csv", transform=transform, img_dir="data/images", label_dir="data/labels")

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=16,
    num_workers=2,
    pin_memory=True,
    shuffle=True,
    drop_last=True)

# Modelling

In [None]:
class YOLOV1(nn.Module):

    def __init__(self):
        super(YOLOV1, self).__init__()

        backbone = torchvision.models.resnet34(weights=torchvision.models.ResNet34_Weights.IMAGENET1K_V1)
        self.features = nn.Sequential(
            backbone.conv1,
            backbone.bn1,
            backbone.relu,
            backbone.maxpool,
            backbone.layer1,
            backbone.layer2,
            backbone.layer3,
            backbone.layer4,
        )


        self.conv_yolo_layers = nn.Sequential(
            # convolution layer 1
            nn.Conv2d(512, 1024, 3, padding=1, bias=False), #512 cause resnet gives 512 channel output
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1),

            # convolution layer 2
            nn.Conv2d(1024, 1024, 3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1),

            # convolution layer 3
            nn.Conv2d(1024, 1024, 3, padding=1, bias=False),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1),

            # convolution layer 4
            nn.Conv2d(1024, 1024, 3, padding=1, bias=False),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(0.1)
            )

        self.fc_yolo_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(7 * 7 * 1024, 4096),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),
            nn.Linear(4096, 7 * 7 * (5 * 2 + 20)),
        )
        # instead of this you could have also used a 1*1 convolution layer as follows
        # self.fc_yolo_layers = nn.Sequential( nn.Conv2d(1024, 5 * self.B + self.C, 1))

    def forward(self, x):
        out = self.features(x)
        out = self.conv_yolo_layers(out)
        out = self.fc_yolo_layers(out)
        return out

In [None]:
model = YoloV1().to(DEVICE)

# Training

In [None]:
import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr=2e-5, weight_decay=0)

from loss import YoloLoss
loss_fn = YoloLoss()

In [None]:
seed = 123
torch.manual_seed(seed)

In [None]:
def train_fn(train_loader, model, optimizer, loss_fn):
    loop = tqdm(train_loader, leave=True)
    mean_loss = []

    for batch_idx, (x, y) in enumerate(loop):
        x, y = x.to(DEVICE), y.to(DEVICE)
        out = model(x)
        loss = loss_fn(out, y)
        mean_loss.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # update progress bar
        loop.set_postfix(loss=loss.item())

    print(f"Mean loss was {sum(mean_loss)/len(mean_loss)}")

In [None]:
for epoch in range(1000):
    # for x, y in train_loader:
    #    x = x.to(DEVICE)
    #    for idx in range(8):
    #        bboxes = cellboxes_to_boxes(model(x))
    #        bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
    #        plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)

    #    import sys
    #    sys.exit()

    pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4)
    mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint")
    print(f"Train mAP: {mean_avg_prec}")

    train_fn(train_loader, model, optimizer, loss_fn)