<a href="https://colab.research.google.com/github/shubhamt2897/DL_BB/blob/main/DL_Bounding_Box.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installing Dependencies

In [None]:
!pip install --upgrade torch torchvision
!pip install numpy matplotlib albumentations kornia open3d gdown



Download & Extract Your Dataset

In [None]:
# Download from Google Drive using gdown
!gdown "https://drive.google.com/uc?id=11s-GLb6LZ0SCAVW6aikqImuuQEEbT_Fb" -O dl_challenge.tar.xz

# Extract the .tar.xz file
!tar -xvf dl_challenge.tar.xz  # This should create a dl_challenge/ directory


In [None]:
import os
import glob

data_dir = 'dl_challenge'  # or the path where dl_challenge is located
folders = sorted(glob.glob(os.path.join(data_dir, '*')))
print("Number of data folders:", len(folders))
print("First folder name:", folders[0])
print("Files in the first folder:", os.listdir(folders[0]))



**Dataset & Preprocessing**

In [None]:
import os

folder_path = 'dl_challenge/96e66c6d-9915-11ee-9103-bbb8eae05561'
print("Files in the folder:", os.listdir(folder_path))


**Data Loading, Model Definition & Training Setup**

 Create a PyTorch Dataset Class

In [None]:
import torch
from torch.utils.data import Dataset
import albumentations as A
import cv2
import numpy as np
import os

class Sereact3DDataset(Dataset):
    def __init__(self, folder_list, transform=None):
        self.folder_list = folder_list
        self.transform = transform

        # Filter folders to include only those with all required files.
        self.folder_list = [
            folder for folder in self.folder_list
            if all(os.path.isfile(os.path.join(folder, fname))
                   for fname in ['rgb.jpg', 'bbox3d.npy', 'mask.npy', 'pc.npy'])
        ]

    def __len__(self):
        return len(self.folder_list)

    def __getitem__(self, idx):
        folder_path = self.folder_list[idx]
        rgb_path   = os.path.join(folder_path, 'rgb.jpg')
        bbox_path  = os.path.join(folder_path, 'bbox3d.npy')
        mask_path  = os.path.join(folder_path, 'mask.npy')
        pc_path    = os.path.join(folder_path, 'pc.npy')

        # Load files
        rgb = cv2.imread(rgb_path)[:, :, ::-1]  # Convert BGR to RGB
        bbox3d = np.load(bbox_path)
        mask = np.load(mask_path)
        point_cloud = np.load(pc_path)

        # If the point cloud looks like an image (shape: [3, H, W]), convert it.
        if point_cloud.ndim == 3 and point_cloud.shape[0] == 3:
            # Convert from (3, H, W) to (H, W, 3) then flatten to (H*W, 3)
            point_cloud = np.transpose(point_cloud, (1, 2, 0))
            point_cloud = point_cloud.reshape(-1, 3)

        # Apply transform to RGB image if provided.
        if self.transform:
            augmented = self.transform(image=rgb)
            rgb = augmented['image']

        # Convert to torch tensors
        rgb = torch.from_numpy(rgb).permute(2, 0, 1).float()  # (C, H, W)
        bbox3d = torch.from_numpy(bbox3d).float()
        mask = torch.from_numpy(mask).float()
        point_cloud = torch.from_numpy(point_cloud).float()  # Expected shape: [N, 3]

        return {
            'rgb': rgb,
            'bbox3d': bbox3d,
            'mask': mask,
            'point_cloud': point_cloud
        }


**Define Transforms and Create DataLoaders**

In [None]:
import glob
from torch.utils.data import DataLoader
import albumentations as A

# Define transforms for training and validation.
train_transform = A.Compose([
    A.Resize(224, 224),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
])
val_transform = A.Compose([A.Resize(224, 224)])

# Get list of folders from 'dl_challenge'
data_dir = 'dl_challenge'
folders = sorted(glob.glob(os.path.join(data_dir, '*')))

# Example splits (adjust counts as needed)
train_folders = folders[:150]
val_folders = folders[150:180]
test_folders = folders[180:]

# Create dataset instances
train_dataset = Sereact3DDataset(train_folders, transform=train_transform)
val_dataset   = Sereact3DDataset(val_folders, transform=val_transform)
test_dataset  = Sereact3DDataset(test_folders, transform=val_transform)

import torch

def convert_bbox_corners_to_params(corners):
    """
    Converts bounding box corners (tensor of shape [8, 3]) to a 7-parameter vector.
    For demonstration purposes:
      - Center is computed as the mean of the corners.
      - Dimensions are computed as the difference between the max and min along each axis.
      - Heading is set to 0.
    Returns a tensor of shape [7].
    """
    center = corners.mean(dim=0)  # [3]
    dims = corners.max(dim=0)[0] - corners.min(dim=0)[0]  # [3]
    heading = torch.tensor([0.0], device=corners.device)  # [1]
    return torch.cat([center, dims, heading], dim=0)  # [7]

def custom_collate(batch):
    collated = {}
    for key in batch[0]:
        if key == 'bbox3d':
            # For each sample, take the first bounding box and convert its corners to a 7-parameter vector.
            collated[key] = torch.stack([convert_bbox_corners_to_params(item[key][0]) for item in batch])
        else:
            try:
                collated[key] = torch.stack([item[key] for item in batch])
            except RuntimeError:
                collated[key] = [item[key] for item in batch]
    return collated


# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=custom_collate)
val_loader   = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=custom_collate)
test_loader  = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=custom_collate)


 Define the Model

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class Simple3DBBoxModel(nn.Module):
    def __init__(self, pretrained=True):
        super(Simple3DBBoxModel, self).__init__()
        resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        self.rgb_backbone = nn.Sequential(*list(resnet.children())[:-1])  # [B, 512, 1, 1]

        self.pc_branch = nn.Sequential(
            nn.Linear(3, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
        )

        self.fc = nn.Sequential(
            nn.Linear(512 + 256, 256),
            nn.ReLU(),
            nn.Linear(256, 7)
        )

    def forward(self, rgb, point_cloud):
        x_rgb = self.rgb_backbone(rgb)         # [B, 512, 1, 1]
        x_rgb = x_rgb.view(x_rgb.size(0), -1)    # [B, 512]

        if isinstance(point_cloud, list):
            pc_feats = []
            device_here = x_rgb.device
            for pc in point_cloud:
                pc = pc.to(device_here)  # Ensure tensor is on GPU
                while pc.dim() > 2:
                    pc = pc.squeeze(0)
                if pc.dim() != 2 or pc.shape[1] != 3:
                    raise ValueError(f"Unexpected point cloud shape: {pc.shape}")
                N, C = pc.shape
                pc_flat = pc.view(N, C)
                feats = self.pc_branch(pc_flat)  # [N, 256]
                feats = feats.mean(dim=0)         # [256]
                pc_feats.append(feats)
            pc_feats = torch.stack(pc_feats, dim=0)  # [B, 256]
        else:
            B, N, C = point_cloud.shape
            point_cloud = point_cloud.to(x_rgb.device)
            pc_flat = point_cloud.view(B * N, C)
            pc_feats = self.pc_branch(pc_flat)
            pc_feats = pc_feats.view(B, N, -1).mean(dim=1)  # [B, 256]

        fused = torch.cat([x_rgb, pc_feats], dim=1)  # [B, 768]
        out = self.fc(fused)                          # [B, 7]
        return out


Define Loss & Training Routine

In [None]:
import torch.optim as optim
import torch.nn.functional as F

def bbox3d_loss(pred, target):
    return F.smooth_l1_loss(pred, target)

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device='cuda'):
    model = model.to(device)
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0.0
        for batch in train_loader:
            rgb = batch['rgb'].to(device)
            # Leave point_cloud as a list if variable-sized.
            pc = batch['point_cloud']
            gt_bbox = batch['bbox3d']
            optimizer.zero_grad()
            if not isinstance(gt_bbox, list):
                gt_bbox = gt_bbox.to(device)
            pred_bbox = model(rgb, pc)
            loss = criterion(pred_bbox, gt_bbox.to(device))
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
        avg_train_loss = total_train_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        model.eval()
        total_val_loss = 0.0
        with torch.no_grad():
            for batch in val_loader:
                rgb = batch['rgb'].to(device)
                pc = batch['point_cloud']
                gt_bbox = batch['bbox3d']
                if not isinstance(gt_bbox, list):
                    gt_bbox = gt_bbox.to(device)
                pred_bbox = model(rgb, pc)
                val_loss = criterion(pred_bbox, gt_bbox.to(device))
                total_val_loss += val_loss.item()
        avg_val_loss = total_val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        print(f"Epoch [{epoch+1}/{num_epochs}] | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

    return train_losses, val_losses


Launch Training

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

model = Simple3DBBoxModel(pretrained=True)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = bbox3d_loss

train_losses, val_losses = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device=device)


Using device: cuda


RuntimeError: CUDA error: uncorrectable ECC error encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
