In [12]:
# basic python and ML Libraries
import os
import random
import numpy as np
import pandas as pd

# for ignoring warnings
import warnings
warnings.filterwarnings('ignore')

# We will be reading images using OpenCV
import cv2

# matplotlib for visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# torchvision libraries
import torch
import torchvision
from torchvision import transforms as torchtrans
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# helper libraries
from engine import train_one_epoch, evaluate
import utils
import transforms as T

from tqdm.auto import tqdm



In [13]:
import torchvision
import torchvision.models.detection as detection
import torchvision.transforms as transforms
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torch.utils.data import DataLoader
import torch.optim as optim
import torch

# Send train=True for training transforms and False for val/test transforms
def get_transform(train):
    transform = [transforms.ToTensor()]
    return transforms.Compose(transform)


In [14]:
class Kitti(torchvision.datasets.Kitti):
    def __getitem__(self, index):
        image, target = super().__getitem__(index)
        # Convert target format from list of dicts to the correct dict format
        labels = [['Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare'].index(t['type']) for t in target]
        boxes = [t['bbox'] for t in target]
        
        target = {'boxes': torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4), 'labels': torch.as_tensor(labels)}
        return image, target

dataset = Kitti(root='../data', transform=get_transform(train=True))
dataset_test = Kitti(root='../data', transform=get_transform(train=False))


# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()

# train test split
test_split = 0.2
tsize = int(len(dataset)*test_split)
dataset = torch.utils.data.Subset(dataset, indices[:-tsize])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-tsize:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
  dataset,
  batch_size=4,
  shuffle=True,
  num_workers=0,
  collate_fn=utils.collate_fn,
)

data_loader_test = torch.utils.data.DataLoader(
  dataset_test,
  batch_size=4,
  shuffle=False,
  num_workers=0,
  collate_fn=utils.collate_fn,
)

In [15]:
def get_object_detection_model(num_classes):
  # load a model pre-trained pre-trained on COCO
  model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
  # get number of input features for the classifier
  in_features = model.roi_heads.box_predictor.cls_score.in_features
  # replace the pre-trained head with a new one
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
  return model

In [16]:
import pprint as pp

image, target = dataset[0]
print(image)
print(type(target))
pp.pprint(target)


print("data loader part")
for images, targets in data_loader:
    print(len(images))
    print(images[0].shape)
    print(type(targets))
    pp.pprint(targets)
    break


tensor([[[0.2196, 0.2196, 0.2039,  ..., 0.0431, 0.0431, 0.0471],
         [0.2196, 0.2196, 0.2157,  ..., 0.0471, 0.0471, 0.0431],
         [0.2196, 0.2235, 0.2157,  ..., 0.0510, 0.0549, 0.0549],
         ...,
         [0.2902, 0.2588, 0.2549,  ..., 0.1176, 0.1255, 0.1294],
         [0.2353, 0.2706, 0.2784,  ..., 0.1059, 0.1176, 0.1216],
         [0.2392, 0.2980, 0.3373,  ..., 0.1059, 0.1176, 0.1216]],

        [[0.3333, 0.3255, 0.3176,  ..., 0.0588, 0.0510, 0.0471],
         [0.3294, 0.3216, 0.3137,  ..., 0.0745, 0.0588, 0.0510],
         [0.3176, 0.3216, 0.3176,  ..., 0.0745, 0.0667, 0.0588],
         ...,
         [0.4157, 0.2824, 0.2510,  ..., 0.1647, 0.1725, 0.1647],
         [0.3529, 0.2157, 0.2549,  ..., 0.1373, 0.1451, 0.1569],
         [0.1686, 0.2392, 0.3176,  ..., 0.1176, 0.1294, 0.1451]],

        [[0.4157, 0.4314, 0.4353,  ..., 0.0745, 0.0549, 0.0471],
         [0.4118, 0.4314, 0.4431,  ..., 0.0667, 0.0588, 0.0588],
         [0.4000, 0.4275, 0.4549,  ..., 0.0627, 0.0627, 0.

In [17]:
# train on gpu if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


# Define the list of classes
class_list = ['Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare']

num_classes = len(class_list) # one class (class 0) is dedicated to the "background"

# get the model using our helper function
model = get_object_detection_model(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(
  optimizer,
  step_size=3,
  gamma=0.1
)

In [18]:
# Function to evaluate the model
def evaluate_model(model, data_loader, device):
    model.eval()  # Set the model to evaluation mode
    eval_results = []
    with torch.no_grad():
        for images, targets in tqdm(data_loader):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            outputs = model(images)
            eval_results.append((outputs, targets))
    return eval_results

# Load the baseline model (pre-trained, unmodified)
baseline_model = get_object_detection_model(num_classes)
baseline_model.to(device)

# Evaluate the baseline model
baseline_eval_results = evaluate_model(baseline_model, data_loader_test, device)


100%|██████████| 374/374 [17:55<00:00,  2.88s/it]


In [None]:
# # training for 5 epochs
# num_epochs = 5

# model_save_path = './models/faster_rcnn_kitti.pth'

# # Training loop
# num_epochs = 5
# for epoch in range(num_epochs):
#     train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
#     lr_scheduler.step()

# # Save the model's state dictionary
# torch.save(model.state_dict(), model_save_path)

In [21]:
import wandb

# Login to WandB (only needed if you haven't configured automatic login)
wandb.login()

num_epochs = 5

# Initialize a new WandB run
wandb.init(project="portalcut",
            entity='231n-augmentation', 
            notes="2024-05-28-kitti-test1-fasterrcnn_resnet50_fpn",
            
            config={
                "learning_rate": 0.005,
                "epochs": num_epochs,
                "batch_size": 4,
                "optimizer": "SGD",
                "momentum": 0.9,
                "weight_decay": 0.0005,
                "lr_scheduler": "StepLR",
                "step_size": 3,
                "gamma": 0.1
            })
config = wandb.config


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msazzadi14[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [23]:
# training for 5 epochs
import math
import sys
import time
import utils

scaler = None  # Define the "scaler" variable

model_save_path = './models/fasterrcnn_resnet50_fpn_kitti.pth'
print_freq = 10
# Training loop

# Assume we have an existing setup
for epoch in range(num_epochs):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
    header = f"Epoch: [{epoch}]"
    start_time = time.time()

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=warmup_factor, total_iters=warmup_iters
        )

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        # Log metrics to WandB
        wandb.log({
            "epoch": epoch,
            "loss": loss_value,
            "learning_rate": optimizer.param_groups[0]["lr"]
        })
        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    # After each epoch
    lr_scheduler.step()



# wandb.log_artifact(model)
torch.save(model.state_dict(), model_save_path)

# model.to_onnx()
# wandb.save("model.onnx")


# Finish WandB run
wandb.finish()


Epoch: [0]  [   0/1497]  eta: 3:39:52  lr: 0.000000  loss: 0.8641 (0.8641)  loss_classifier: 0.4238 (0.4238)  loss_box_reg: 0.2727 (0.2727)  loss_objectness: 0.1330 (0.1330)  loss_rpn_box_reg: 0.0345 (0.0345)  time: 8.8123  data: 0.0892
Epoch: [0]  [  10/1497]  eta: 3:27:59  lr: 0.000002  loss: 0.8237 (0.8513)  loss_classifier: 0.4143 (0.4299)  loss_box_reg: 0.2335 (0.2567)  loss_objectness: 0.0526 (0.0823)  loss_rpn_box_reg: 0.0758 (0.0824)  time: 8.3922  data: 0.0825
Epoch: [0]  [  20/1497]  eta: 3:23:06  lr: 0.000003  loss: 0.8209 (0.8944)  loss_classifier: 0.3918 (0.4202)  loss_box_reg: 0.2260 (0.2471)  loss_objectness: 0.0597 (0.1279)  loss_rpn_box_reg: 0.0758 (0.0992)  time: 8.2225  data: 0.0815
Epoch: [0]  [  30/1497]  eta: 3:18:37  lr: 0.000004  loss: 0.7025 (0.8502)  loss_classifier: 0.3786 (0.4088)  loss_box_reg: 0.2131 (0.2412)  loss_objectness: 0.0680 (0.1121)  loss_rpn_box_reg: 0.0603 (0.0881)  time: 7.9764  data: 0.0839
Epoch: [0]  [  40/1497]  eta: 3:14:41  lr: 0.000005 

ValueError: Path is not a file: ./models/fasterrcnn_resnet50_fpn_kitti.pth