In [1]:
# basic python and ML Libraries
import os
import random
import numpy as np
import pandas as pd

# for ignoring warnings
import warnings
warnings.filterwarnings('ignore')

# We will be reading images using OpenCV
import cv2

# matplotlib for visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# torchvision libraries
import torch
import torchvision
from torchvision import transforms as torchtrans
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# helper libraries
from engine import train_one_epoch, evaluate
import utils
import transforms as T

from tqdm.auto import tqdm



In [2]:
import torchvision
import torchvision.models.detection as detection
import torchvision.transforms as transforms
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torch.utils.data import DataLoader
import torch.optim as optim
import torch

# Send train=True for training transforms and False for val/test transforms
def get_transform():
    transform = [transforms.ToTensor()]
    return transforms.Compose(transform)


In [3]:
from datasets import KittiTorch

# Assuming KittiTorch and utils are defined/imported correctly
dataset = KittiTorch(root='../data', download=True, transform=get_transform())

# Print initial dataset size
print("Initial dataset size:", len(dataset))

# Seed and random permutation
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()

# Calculate split sizes
train_split = 0.8
val_split = 0.1  # 10% for validation
test_split = 0.1  # 10% for test

# Calculate indices for splits
train_size = int(len(dataset) * train_split)
val_size = int(len(dataset) * val_split)
test_size = len(dataset) - train_size - val_size  # To ensure full coverage

# Split indices
train_indices = indices[:train_size]
val_indices = indices[train_size:train_size + val_size]
test_indices = indices[train_size + val_size:]

# Create dataset subsets
dataset_train = torch.utils.data.Subset(dataset, train_indices)
dataset_val = torch.utils.data.Subset(dataset, val_indices)
dataset_test = torch.utils.data.Subset(dataset, test_indices)

# Define batch size
batch_size = 4

# Data loaders
data_loader_train = torch.utils.data.DataLoader(
    dataset_train,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=utils.collate_fn,
)

data_loader_val = torch.utils.data.DataLoader(
    dataset_val,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    collate_fn=utils.collate_fn,
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    collate_fn=utils.collate_fn,
)

# Print sizes of datasets to confirm splits
print("Training set size:", len(dataset_train))
print("Validation set size:", len(dataset_val))
print("Testing set size:", len(dataset_test))


Initial dataset size: 7481
Training set size: 5984
Validation set size: 748
Testing set size: 749


In [4]:
# Check if GPU is available
torch.cuda.empty_cache()
if torch.cuda.is_available():
    gpu_count = torch.cuda.device_count()
    print(f"Number of GPUs available: {gpu_count}")
    
    for i in range(gpu_count):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  - {torch.cuda.memory_allocated(i) / 1024**3:.2f} GB")
        print(f"  - Memory Cached: {torch.cuda.memory_reserved(i) / 1024**3:.2f} GB")
        print(f"  - Memory Total: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
        print(f"  - Compute Capability: {torch.cuda.get_device_capability(i)}")
        print(f"  - Multiprocessors: {torch.cuda.get_device_properties(i).multi_processor_count}")
else:
    print("No GPU is available.")

Number of GPUs available: 1
GPU 0: NVIDIA GeForce RTX 3070
  - 0.00 GB
  - Memory Cached: 0.00 GB
  - Memory Total: 7.78 GB
  - Compute Capability: (8, 6)
  - Multiprocessors: 46


In [5]:
def get_object_detection_model(num_classes):
  model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=True)
  # get number of input features for the classifier
  in_features = model.roi_heads.box_predictor.cls_score.in_features
  # replace the pre-trained head with a new one
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


#   for param in model.backbone.parameters():
#         param.requires_grad = False
        
  return model

In [6]:
model = get_object_detection_model(10)

In [7]:
import pprint as pp

image, target = dataset[0]
print(image)
print(type(target))
pp.pprint(target)


print("data loader part")
for images, targets in data_loader_train:
    print(len(images))
    print(images[0].shape)
    print(type(targets))
    pp.pprint(targets)
    break


tensor([[[0.0667, 0.0627, 0.0980,  ..., 0.1451, 0.1373, 0.1412],
         [0.0667, 0.0549, 0.0941,  ..., 0.1451, 0.1373, 0.1373],
         [0.0667, 0.0706, 0.0706,  ..., 0.1333, 0.1255, 0.1176],
         ...,
         [0.0745, 0.0667, 0.0627,  ..., 0.0941, 0.0784, 0.0784],
         [0.0471, 0.0471, 0.0549,  ..., 0.0784, 0.0824, 0.0824],
         [0.0510, 0.0549, 0.0549,  ..., 0.0863, 0.0863, 0.0784]],

        [[0.0863, 0.0941, 0.1137,  ..., 0.1137, 0.0941, 0.0667],
         [0.0745, 0.0902, 0.1098,  ..., 0.1137, 0.0980, 0.0706],
         [0.0627, 0.0627, 0.0824,  ..., 0.1137, 0.0902, 0.0667],
         ...,
         [0.0902, 0.0863, 0.0863,  ..., 0.0980, 0.1020, 0.1059],
         [0.0824, 0.0745, 0.0706,  ..., 0.0980, 0.0980, 0.1059],
         [0.0784, 0.0706, 0.0667,  ..., 0.1176, 0.1137, 0.1176]],

        [[0.0706, 0.1098, 0.1294,  ..., 0.0902, 0.0824, 0.0588],
         [0.0627, 0.0784, 0.0941,  ..., 0.0980, 0.0824, 0.0549],
         [0.0627, 0.0745, 0.0824,  ..., 0.1059, 0.0863, 0.

In [8]:
# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

lr = 10e-3

# Define the list of classes
class_list = ['Background', 'Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare']
num_classes = len(class_list)  # one class (class 0) is dedicated to the "background"

# Assume get_object_detection_model is defined and returns a model instance
model = get_object_detection_model(num_classes)

# Move model to the right device
model.to(device)

# Construct an optimizer (using Adam here)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.Adam(params, lr=lr)  # Set a learning rate, modify as needed

# Optionally, you can set other parameters like betas and eps
# optimizer = optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.0005)

cuda


In [9]:
torch.cuda.empty_cache()

In [10]:
import wandb

# Login to WandB (only needed if you haven't configured automatic login)
wandb.login()

num_epochs = 5

# Initialize a new WandB run
wandb.init(project="portalcut",
            entity='231n-augmentation', 
            notes="2024-05-30-kitti-test1-fasterrcnn_resnet50_fpn_v2_scratch_50ep_v2",
            
            config={
                "learning_rate": lr,
                "epochs": num_epochs,
                "batch_size": batch_size,
                "optimizer": "Adam",
            })


config = wandb.config


In [11]:
# training for 5 epochs
import math
import sys
import time
import utils

data_loader = data_loader_train

scaler = None  # Define the "scaler" variable

model_save_path = './models/2024-05-30-kitti-test1-fasterrcnn_resnet50_fpn_v2_scratch_50epv2.pth'
print_freq = 10
# Training loop

# Assume we have an existing setup
for epoch in range(num_epochs):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
    header = f"Epoch: [{epoch}]"
    start_time = time.time()

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=warmup_factor, total_iters=warmup_iters
        )

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        # Log metrics to WandB
        wandb.log({
            **loss_dict_reduced,
            "epoch": epoch,
            "loss": loss_value,
            "learning_rate": optimizer.param_groups[0]["lr"]
        })
        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
        print(loss_dict)
        print(loss_dict_reduced)

    # After each epoch
    # lr_scheduler.step()



# wandb.log_artifact(model)
torch.save(model.state_dict(), model_save_path)

# model.to_onnx()
# wandb.save("model.onnx")


# Finish WandB run
wandb.finish()

In [None]:
if torch.cuda.is_available():
    gpu_count = torch.cuda.device_count()
    print(f"Number of GPUs available: {gpu_count}")
    
    for i in range(gpu_count):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        print(f"  - {torch.cuda.memory_allocated(i) / 1024**3:.2f} GB")
        print(f"  - Memory Cached: {torch.cuda.memory_reserved(i) / 1024**3:.2f} GB")
        print(f"  - Memory Total: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
        print(f"  - Compute Capability: {torch.cuda.get_device_capability(i)}")
        print(f"  - Multiprocessors: {torch.cuda.get_device_properties(i).multi_processor_count}")
else:
    print("No GPU is available.")

In [None]:
model

In [None]:
dataset_test[0]

In [None]:
# Write the code that saves up the model from the internet and tests it
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import cv2
import numpy as np

image = dataset_test[5][0]
# Load the model
model.load_state_dict(torch.load(model_save_path))

# Ensure your model is on the GPU
model = model.to('cuda')

# Move the input tensor to the GPU
input_tensor = image.unsqueeze(0).to('cuda')


model.eval()

# Disable gradient computation during inference
with torch.no_grad():
    predictions = model(input_tensor)


import cv2
import matplotlib.pyplot as plt
import numpy as np

def visualize_image_with_boxes(image, boxes, labels, label_names):
    # Convert tensor image to numpy array
    image = image.cpu().numpy().transpose((1, 2, 0))
    # Scale the image's pixel values to [0, 255]
    image = cv2.normalize(image, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F).astype(np.uint8)
    # Convert the image to CPU and NumPy format for plotting


    # Define colors for different classes
    colors = {
        'Car': (255, 0, 0), 'Van': (0, 255, 0), 'Truck': (0, 0, 255),
        'Pedestrian': (255, 255, 0), 'Person_sitting': (255, 0, 255), 'Cyclist': (0, 255, 255),
        'Tram': (127, 127, 255), 'Misc': (255, 127, 127), "Don'tCare": (127, 127, 127)
    }

    # Draw boxes and labels
    for box, label in zip(boxes, labels):
        box = box.cpu().numpy().astype(int)
        label = int(label.cpu())
        box = box.astype(int)
        print(label)
        label_text = label_names[label]
        color = colors.get(label_text, (255, 255, 255))

        # Draw rectangle
        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 2)

        # Put label
        cv2.putText(image, label_text, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Display the image
    plt.figure(figsize=(12, 8))
    plt.imshow(image)
    plt.axis('off')
    plt.show()

# Define label names based on your dataset specifics
label_names = ['Background', 'Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare']

# Sample call to the function

target = predictions[0]
# image = dataset[0][0]
plt.imshow(image.permute(1, 2, 0))
# This assumes `image` is a tensor from the dataset, `boxes` is a tensor of bounding boxes, and `labels` is a tensor of label indices
visualize_image_with_boxes(image, target['boxes'], target['labels'], label_names)


In [None]:
import torch
import torch.nn as nn
import numpy as np
from torchvision.ops import box_iou
from collections import defaultdict

# Load the model
model.load_state_dict(torch.load(model_save_path))
model = model.to('cuda')
model.eval()

# Define label names based on your dataset specifics
label_names = ['Background', 'Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare']

def compute_iou(boxes1, boxes2):
    if boxes1.numel() == 0 or boxes2.numel() == 0:
        return torch.tensor([]).to(boxes1.device)
    boxes1 = boxes1.to('cuda')
    boxes2 = boxes2.to('cuda')
    ious = box_iou(boxes1, boxes2)
    return ious

def evaluate_model(model, dataset, label_names, iou_threshold=0.5):
    all_true_boxes = []
    all_pred_boxes = []
    all_true_labels = []
    all_pred_labels = []

    for idx in range(len(dataset)):
        image, target = dataset[idx]
        input_tensor = image.unsqueeze(0).to('cuda')
        
        with torch.no_grad():
            predictions = model(input_tensor)[0]
        
        true_boxes = target['boxes'].to('cuda')
        true_labels = target['labels'].to('cuda')
        pred_boxes = predictions['boxes'].to('cuda')
        pred_labels = predictions['labels'].to('cuda')

        all_true_boxes.append(true_boxes)
        all_pred_boxes.append(pred_boxes)
        all_true_labels.append(true_labels)
        all_pred_labels.append(pred_labels)

    iou_scores = []
    for true_boxes, pred_boxes in zip(all_true_boxes, all_pred_boxes):
        iou_scores.append(compute_iou(true_boxes, pred_boxes))

    mean_iou = torch.mean(torch.stack([torch.mean(iou) for iou in iou_scores if iou.numel() > 0]))
    print(f"Mean IoU: {mean_iou:.4f}")

    # Compute mAP (mean Average Precision)
    aps = []
    for i, label_name in enumerate(label_names):
        if label_name == "Background":
            continue
        
        true_positives = []
        false_positives = []
        num_gt = 0

        for true_boxes, true_labels, pred_boxes, pred_labels in zip(all_true_boxes, all_true_labels, all_pred_boxes, all_pred_labels):
            gt_boxes = true_boxes[true_labels == i]
            pred_boxes = pred_boxes[pred_labels == i]
            num_gt += len(gt_boxes)

            if len(pred_boxes) == 0:
                continue
            
            ious = compute_iou(gt_boxes, pred_boxes)
            if ious.numel() == 0:
                continue
            true_positive = ious.max(dim=0)[0] > iou_threshold
            false_positive = ~true_positive

            true_positives.extend(true_positive.cpu().numpy())
            false_positives.extend(false_positive.cpu().numpy())
        
        tp_cumsum = np.cumsum(true_positives)
        fp_cumsum = np.cumsum(false_positives)
        precisions = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-6)
        recalls = tp_cumsum / (num_gt + 1e-6)

        ap = np.trapz(precisions, recalls)
        aps.append(ap)

        print(f"AP for {label_name}: {ap:.4f}")

    mAP = np.mean(aps)
    print(f"Mean Average Precision (mAP): {mAP:.4f}")

# Example usage
evaluate_model(model, dataset_test, label_names)


In [None]:
evaluate_model(model, dataset, label_names)