In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
FOLDERNAME = "Colab\ Notebooks/WeHelp/"

Mounted at /content/drive


In [2]:
%cd drive/MyDrive/$FOLDERNAME

/content/drive/MyDrive/Colab Notebooks/WeHelp


In [3]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision.utils import draw_bounding_boxes
import matplotlib.pyplot as plt
import random
import ast

In [4]:
path = 'data/vehicles_images/'
train = os.path.join(path, "train")
test = os.path.join(path, "test")
category = os.path.join(path, "category.txt")
train_labels = os.path.join(path, "train_labels.csv")
test_labels = os.path.join(path, "test_labels.csv")
output_path = os.path.join(path, "output")
checkpoint = os.path.join(path, "best_model.pth")
os.makedirs(output_path, exist_ok=True)

In [5]:
category_dict = None
with open(category, 'r') as f:
  line = f.read().strip()
  if ':' in line:
    categories_str = line.split(":", 1)[1].strip()
    category_dict = ast.literal_eval(categories_str)
classes = [None] * len(category_dict)
for k, v in category_dict.items():
  classes[v] = k

In [6]:
category_dict

{'Bus': 0, 'Car': 1, 'Motorcycle': 2, 'Pickup': 3, 'Truck': 4}

In [7]:
# Custom dataset
class VehicleDataset(Dataset):
  def __init__(self, img_dir, label_csv, transforms=None):
    self.img_dir = img_dir
    self.df = pd.read_csv(label_csv)
    self.imgs = self.df['filename'].unique().tolist()
    self.transforms = transforms

  def __getitem__(self, idx):
    filename = self.imgs[idx]
    img_path = os.path.join(self.img_dir, filename)
    image = read_image(img_path).float() / 255.0  # Normalize to [0, 1]
    records = self.df[self.df['filename'] == filename]
    boxes = records[['xmin', 'ymin', 'xmax', 'ymax']].values
    labels = records['class'].apply(lambda x: category_dict[x]).values
    boxes = torch.as_tensor(boxes, dtype=torch.float32)
    labels = torch.as_tensor(labels, dtype=torch.int64)
    target = {"boxes": boxes, "labels": labels, "image_id": torch.tensor([idx])}
    if self.transforms:
      image = self.transforms(image)
    return image, target

  def __len__(self):
    return len(self.imgs)

In [8]:
# Load pretrained faster R-CNN
num_classes = len(category_dict)
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
transform = weights.transforms()
model = fasterrcnn_resnet50_fpn(weights=weights)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Enable backbone fine-tuning
for name, param in model.backbone.body.named_parameters():
    param.requires_grad = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [9]:
# Load datasets
train_dataset = VehicleDataset(train, train_labels, transforms=transform)
test_dataset = VehicleDataset(test, test_labels)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [11]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

In [12]:
# Training
def train_one_epoch(epoch):
  model.train()
  for imgs, targets in train_loader:
    imgs = [img.to(device) for img in imgs]
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    loss_dict = model(imgs, targets)
    losses = sum(loss for loss in loss_dict.values())

    optimizer.zero_grad()
    losses.backward()
    optimizer.step()

  print(f"[Epoch {epoch}] Loss: {losses.item():.4f}")

In [13]:
# Evaluation Function
def evaluate_model():
  model.eval()
  total_score = 0.0
  total_objects = 0

  with torch.no_grad():
    for imgs, _ in test_loader:
      imgs = [img.to(device) for img in imgs]
      outputs = model(imgs)

      for output in outputs:
        scores = output['scores'].cpu()
        if len(scores) > 0:
          total_score += scores.sum().item()
          total_objects += len(scores)

  average_score = total_score / total_objects if total_objects > 0 else 0.0
  return average_score

In [14]:
class_color_map = {
  classes[0]: "red",
  classes[1]: "green",
  classes[2]: "blue",
  classes[3]: "yellow",
  classes[4]: "magenta"
}

In [15]:
# Save predictions on sample images
def save_predictions(n=2):
  model.eval()
  count = 0

  with torch.no_grad():
    for imgs, _ in test_loader:
      imgs = [img.to(device) for img in imgs]
      outputs = model(imgs)

      for img_tensor, output in zip(imgs, outputs):
        img = img_tensor.cpu()
        boxes = output['boxes'].cpu()
        scores = output['scores'].cpu()
        labels_idx = output['labels'].cpu()
        keep = scores > 0.6
        boxes = boxes[keep]
        labels_idx = labels_idx[keep]
        labels = [classes[i] for i in labels_idx]
        colors = [class_color_map.get(label, "white") for label in labels]
        if len(boxes) == 0:
          continue
        img = (img * 255).byte()
        drawn = draw_bounding_boxes(img, boxes, labels=labels, colors=colors, width=3)
        img_np = F.to_pil_image(drawn)

        out_path = os.path.join(output_path, f"prediction_{count}.png")
        img_np.save(out_path)
        print(f"Saved: {out_path}")
        count += 1
        if count >= n:
          return

In [16]:
num_epochs = 50
best_score = 0

for epoch in range(num_epochs):
  train_one_epoch(epoch)
  avg_score = evaluate_model()

  # Save best model
  if avg_score > best_score:
    best_score = avg_score
    torch.save(model.state_dict(), checkpoint)

print(f"\nBest average score: {best_score:.2%}")

if best_score > 0.6:
  save_predictions(n=2)

[Epoch 0] Loss: 1.0101
[Epoch 1] Loss: 0.6442
[Epoch 2] Loss: 0.5428
[Epoch 3] Loss: 0.4343
[Epoch 4] Loss: 0.2934
[Epoch 5] Loss: 0.4297
[Epoch 6] Loss: 0.4842
[Epoch 7] Loss: 0.2937
[Epoch 8] Loss: 0.3674
[Epoch 9] Loss: 0.2955
[Epoch 10] Loss: 0.2951
[Epoch 11] Loss: 0.2535
[Epoch 12] Loss: 0.4072
[Epoch 13] Loss: 0.3749
[Epoch 14] Loss: 0.1482
[Epoch 15] Loss: 0.2748
[Epoch 16] Loss: 0.2974
[Epoch 17] Loss: 0.3093
[Epoch 18] Loss: 0.3428
[Epoch 19] Loss: 0.2721
[Epoch 20] Loss: 0.2154
[Epoch 21] Loss: 0.2870
[Epoch 22] Loss: 0.3057
[Epoch 23] Loss: 0.1749
[Epoch 24] Loss: 0.2286
[Epoch 25] Loss: 0.2805
[Epoch 26] Loss: 0.2907
[Epoch 27] Loss: 0.2452
[Epoch 28] Loss: 0.1114
[Epoch 29] Loss: 0.1705
[Epoch 30] Loss: 0.2452
[Epoch 31] Loss: 0.0808
[Epoch 32] Loss: 0.2770
[Epoch 33] Loss: 0.1695
[Epoch 34] Loss: 0.2148
[Epoch 35] Loss: 0.2036
[Epoch 36] Loss: 0.2225
[Epoch 37] Loss: 0.1395
[Epoch 38] Loss: 0.1837
[Epoch 39] Loss: 0.1699
[Epoch 40] Loss: 0.2084
[Epoch 41] Loss: 0.0923
[E