# Pure CNN using VGG16

This notebook is a simple CNN using VGG16 architecture.

In [1]:
# Libraries import
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import functional as F
import numpy as np
import os
import json
from PIL import Image
from pycocotools.coco import COCO


In [None]:
# Colab setup
if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    drive.mount('/content/drive')

    # copy dataset to local dir
    !cp /content/drive/MyDrive/Datasets/YOLO_UODD_dataset.zip ./datasets/YOLO_UODD_dataset.zip
    !unzip -q ./datasets/YOLO_UODD_dataset.zip -d ./datasets

In [7]:
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import CocoDetection

class UODDDataset(Dataset):
    def __init__(self, root, annotation_file, transforms=None):
        self.root = root
        self.transforms = transforms
        with open(annotation_file) as f:
            self.coco = json.load(f)
        self.ids = list(sorted(self.coco['images'], key=lambda x: x['id']))
        # Filter images with annotations
        self.ids = [img for img in self.ids if self.has_annotations(img['id'])]

    def has_annotations(self, img_id):
        ann_ids = [ann for ann in self.coco['annotations'] if ann['image_id'] == img_id]
        return len(ann_ids) > 0

    def __getitem__(self, idx):
        img_id = self.ids[idx]['id']
        img_info = self.ids[idx]
        img_path = os.path.join(self.root, img_info['file_name'])
        img = Image.open(img_path).convert("RGB")

        ann_ids = [ann for ann in self.coco['annotations'] if ann['image_id'] == img_id]
        boxes = []
        labels = []
        for ann in ann_ids:
            xmin, ymin, w, h = ann['bbox']
            boxes.append([xmin, ymin, xmin + w, ymin + h])
            labels.append(ann['category_id'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.ids)
    

def get_transform():
    transforms = []
    transforms.append(F.to_tensor)
    return torchvision.transforms.Compose(transforms)

# Cell 5: Load data
train_data = UODDDataset('./Datasets/Underwater-object-detection-dataset/imgs/train', './Datasets/Underwater-object-detection-dataset/annotations/coco/instances_train.json', transforms=get_transform())
val_data = UODDDataset('./Datasets/Underwater-object-detection-dataset/imgs/val', './Datasets/Underwater-object-detection-dataset/annotations/coco/instances_val.json', transforms=get_transform())


train_loader = DataLoader(train_data, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_data, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [4]:
print(f"Train dataset: {len(train_data)}")
print(f"Validation dataset: {len(val_data)}")

Train dataset: 2560
Validation dataset: 128


In [5]:
from torchvision.models import VGG16_Weights

backbone = torchvision.models.vgg16(weights=VGG16_Weights.DEFAULT).features
backbone.out_channels = 512

# Define the RPN anchor generator
rpn_anchor_generator = AnchorGenerator(
    sizes=((32, 64, 128, 256, 512),),
    aspect_ratios=((0.5, 1.0, 2.0),) * 1
)

# Define the RoI pooler
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2)

# Create the Faster R-CNN model
model = FasterRCNN(backbone, num_classes=5,
                   rpn_anchor_generator=rpn_anchor_generator,
                   box_roi_pool=roi_pooler)

In [6]:
from tqdm import tqdm

# Training setup
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Training function with progress bar and loss printing
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    running_loss = 0.0
    num_batches = len(data_loader)
    progress_bar = tqdm(data_loader, desc=f"Epoch {epoch+1}", unit="batch")

    for images, targets in progress_bar:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        running_loss += losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        progress_bar.set_postfix(loss=losses.item())

    epoch_loss = running_loss / num_batches
    print(f"Epoch {epoch+1} Loss: {epoch_loss:.4f}")

# Training loop with tqdm progress bar
num_epochs = 2
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()

# Save the model
torch.save(model.state_dict(), 'fasterrcnn_vgg16_uodd.pth')

print("Training complete. Model saved as 'fasterrcnn_vgg16_uodd.pth'.")

Epoch 1:  44%|████▍     | 561/1280 [04:55<06:18,  1.90batch/s, loss=0.253] 


AssertionError: Expected target boxes to be a tensor of shape [N, 4], got torch.Size([0]).