In [1]:
!pip install albumentations==0.4.6


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# basic python and ML Libraries
import os
import random
import numpy as np
import pandas as pd

# for ignoring warnings
import warnings
warnings.filterwarnings('ignore')

# We will be reading images using OpenCV
import cv2

# matplotlib for visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# torchvision libraries
import torch
import torchvision
from torchvision import transforms as torchtrans
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# helper libraries
from engine import train_one_epoch, evaluate
import utils
import transforms as T

# for image augmentations
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2


In [3]:
import torchvision
import torchvision.models.detection as detection
import torchvision.transforms as transforms
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torch.utils.data import DataLoader
import torch.optim as optim
import torch

# Send train=True for training transforms and False for val/test transforms
def get_transform(train):
    transform = [transforms.ToTensor()]
    return transforms.Compose(transform)


In [4]:
class Kitti(torchvision.datasets.Kitti):
    def __getitem__(self, index):
        image, target = super().__getitem__(index)
        # Convert target format from list of dicts to the correct dict format
        labels = [['Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare'].index(t['type']) for t in target]
        boxes = [t['bbox'] for t in target]
        
        target = {'boxes': torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4), 'labels': torch.as_tensor(labels)}
        return image, target

dataset = Kitti(root='../data', transform=get_transform(train=True))
dataset_test = Kitti(root='../data', transform=get_transform(train=False))


# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()

# train test split
test_split = 0.2
tsize = int(len(dataset)*test_split)
dataset = torch.utils.data.Subset(dataset, indices[:-tsize])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-tsize:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
  dataset,
  batch_size=4,
  shuffle=True,
  num_workers=0,
  collate_fn=utils.collate_fn,
)

data_loader_test = torch.utils.data.DataLoader(
  dataset_test,
  batch_size=4,
  shuffle=False,
  num_workers=0,
  collate_fn=utils.collate_fn,
)

In [5]:
def get_object_detection_model(num_classes):
  # load a model pre-trained pre-trained on COCO
  model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
  # get number of input features for the classifier
  in_features = model.roi_heads.box_predictor.cls_score.in_features
  # replace the pre-trained head with a new one
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
  return model

In [6]:
import pprint as pp

image, target = dataset[0]
print(image)
print(type(target))
pp.pprint(target)


print("data loader part")
for images, targets in data_loader:
    print(len(images))
    print(images[0].shape)
    print(type(targets))
    pp.pprint(targets)
    break


tensor([[[0.2196, 0.2196, 0.2039,  ..., 0.0431, 0.0431, 0.0471],
         [0.2196, 0.2196, 0.2157,  ..., 0.0471, 0.0471, 0.0431],
         [0.2196, 0.2235, 0.2157,  ..., 0.0510, 0.0549, 0.0549],
         ...,
         [0.2902, 0.2588, 0.2549,  ..., 0.1176, 0.1255, 0.1294],
         [0.2353, 0.2706, 0.2784,  ..., 0.1059, 0.1176, 0.1216],
         [0.2392, 0.2980, 0.3373,  ..., 0.1059, 0.1176, 0.1216]],

        [[0.3333, 0.3255, 0.3176,  ..., 0.0588, 0.0510, 0.0471],
         [0.3294, 0.3216, 0.3137,  ..., 0.0745, 0.0588, 0.0510],
         [0.3176, 0.3216, 0.3176,  ..., 0.0745, 0.0667, 0.0588],
         ...,
         [0.4157, 0.2824, 0.2510,  ..., 0.1647, 0.1725, 0.1647],
         [0.3529, 0.2157, 0.2549,  ..., 0.1373, 0.1451, 0.1569],
         [0.1686, 0.2392, 0.3176,  ..., 0.1176, 0.1294, 0.1451]],

        [[0.4157, 0.4314, 0.4353,  ..., 0.0745, 0.0549, 0.0471],
         [0.4118, 0.4314, 0.4431,  ..., 0.0667, 0.0588, 0.0588],
         [0.4000, 0.4275, 0.4549,  ..., 0.0627, 0.0627, 0.

In [7]:
# train on gpu if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


# Define the list of classes
class_list = ['Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare']

num_classes = len(class_list) # one class (class 0) is dedicated to the "background"

# get the model using our helper function
model = get_object_detection_model(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(
  optimizer,
  step_size=3,
  gamma=0.1
)

In [8]:
# training for 5 epochs
num_epochs = 5

for epoch in range(num_epochs):
    # training for one epoch
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)
    
    


Epoch: [0]  [   0/1497]  eta: 3:23:07  lr: 0.000010  loss: 2.2444 (2.2444)  loss_classifier: 1.9724 (1.9724)  loss_box_reg: 0.0695 (0.0695)  loss_objectness: 0.0798 (0.0798)  loss_rpn_box_reg: 0.1226 (0.1226)  time: 8.1416  data: 0.0955
Epoch: [0]  [  10/1497]  eta: 3:36:07  lr: 0.000060  loss: 2.2444 (2.2922)  loss_classifier: 1.9368 (1.9154)  loss_box_reg: 0.1781 (0.2183)  loss_objectness: 0.0725 (0.0861)  loss_rpn_box_reg: 0.0562 (0.0724)  time: 8.7207  data: 0.1029
Epoch: [0]  [  20/1497]  eta: 3:27:12  lr: 0.000110  loss: 1.9197 (1.9275)  loss_classifier: 1.5829 (1.5444)  loss_box_reg: 0.1935 (0.2131)  loss_objectness: 0.0609 (0.0887)  loss_rpn_box_reg: 0.0583 (0.0813)  time: 8.4315  data: 0.0995
Epoch: [0]  [  30/1497]  eta: 3:22:51  lr: 0.000160  loss: 1.0179 (1.5626)  loss_classifier: 0.5967 (1.1752)  loss_box_reg: 0.2019 (0.2138)  loss_objectness: 0.0845 (0.0958)  loss_rpn_box_reg: 0.0728 (0.0779)  time: 8.0635  data: 0.0976
Epoch: [0]  [  40/1497]  eta: 3:18:49  lr: 0.000210 

In [None]:
for epoch in range(num_epochs):
    for images, targets in data_loader:
        labels = [t["labels"] for t in targets]
        if any(l.item() > 8 for l in torch.cat(labels)):
            print(f"Invalid label detected: {labels}")
        break  # Remove this after testing to run full training
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    lr_scheduler.step()
