In [1]:
# Import all required libraries
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.data_classes import Box
from nuscenes.utils.data_classes import RadarPointCloud
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(suppress=True, precision=4)  # suppress scientific notation, 4 decimal digits
import pandas as pd
from nuscenes.can_bus.can_bus_api import NuScenesCanBus
from pyquaternion import Quaternion
from nuscenes.utils.geometry_utils import view_points
import copy
import os
from scipy.spatial import cKDTree
import pandas as pd
import cv2
import torch

In [2]:
# Load MiDaS model
model_type = "DPT_Large"  # or "MiDaS_small"
model = torch.hub.load("intel-isl/MiDaS", model_type)
model.eval()

# Load transforms
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.dpt_transform if model_type.startswith("DPT") else midas_transforms.small_transform

Using cache found in C:\Users\dilip/.cache\torch\hub\intel-isl_MiDaS_master
  from .autonotebook import tqdm as notebook_tqdm
Using cache found in C:\Users\dilip/.cache\torch\hub\intel-isl_MiDaS_master


In [3]:
#Create the nuscene object to read and traverse the data
nusc = NuScenes(version='v1.0-mini', dataroot='../data/sets/nuscenes', verbose=True)

Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.454 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.


In [None]:
# Function to get sample details
def get_sample_details(sample_token):
    sample = nusc.get('sample', sample_token)
    cam_token = sample['data']['CAM_FRONT']
    cam_data = nusc.get('sample_data', cam_token)
    radar_token = sample['data']['RADAR_FRONT']
    radar_data = nusc.get('sample_data', radar_token)
    radar_pc = RadarPointCloud.from_file(os.path.join(nusc.dataroot, radar_data['filename']))
    image_path = os.path.join(nusc.dataroot, cam_data['filename'])
    return sample, cam_token, cam_data, radar_token, radar_data, radar_pc, image_path

In [None]:
# Function to transform radar points to camera frame and project onto image plane
def get_transformed_data(radar_data, cam_data):
    # First step: transform radar point-cloud to the ego vehicle frame for the timestamp of the sweep.
    cs_record = nusc.get('calibrated_sensor', radar_data['calibrated_sensor_token'])
    radar_pc_s1 = copy.deepcopy(radar_pc)
    radar_pc_s1.rotate(Quaternion(cs_record['rotation']).rotation_matrix)
    radar_pc_s1.translate(np.array(cs_record['translation']))
    
    # Second step: transform to the global frame.
    poserecord = nusc.get('ego_pose', radar_data['ego_pose_token'])
    radar_pc_s2 = copy.deepcopy(radar_pc_s1)
    radar_pc_s2.rotate(Quaternion(poserecord['rotation']).rotation_matrix)
    radar_pc_s2.translate(np.array(poserecord['translation']))
    
    # Third step: transform camera into the ego vehicle frame for the timestamp of the image.
    poserecord = nusc.get('ego_pose', cam_data['ego_pose_token'])
    radar_pc_s3 = copy.deepcopy(radar_pc_s2)
    radar_pc_s3.translate(-np.array(poserecord['translation']))
    radar_pc_s3.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T)
    
    # Fourth step: transform into the camera.
    cs_record = nusc.get('calibrated_sensor', cam_data['calibrated_sensor_token'])
    radar_pc_s4 = copy.deepcopy(radar_pc_s3)
    radar_pc_s4.translate(-np.array(cs_record['translation']))
    radar_pc_s4.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T)
    
    points_im = view_points(radar_pc_s4.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True)
    
    return radar_pc_s2, radar_pc_s4, points_im

In [None]:
# Function to compute depth at given image coordinates using MiDaS
def get_depth_at_coords(img_filename, image_coords, model, transform):
    """
    Computes depth values at specified image coordinates using MiDaS.

    Parameters:
        img_filename (str): Path to the image file.
        image_coords (np.ndarray): Array of shape (N, 2) with (x, y) pixel coordinates.
        model (torch.nn.Module): Preloaded MiDaS model.
        transform (callable): MiDaS transform function.

    Returns:
        np.ndarray: Depth values at the given coordinates, shape (N,)
    """
    # Load and convert image
    img_cv = cv2.imread(img_filename)
    if img_cv is None:
        raise FileNotFoundError(f"Image not found: {img_filename}")
    img_rgb = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)

    # Apply MiDaS transform
    input_tensor = transform(img_rgb)

    # Predict depth
    with torch.no_grad():
        prediction = model(input_tensor)
        depth = prediction.squeeze().cpu().numpy()

    # Resize depth to match original image
    depth_resized = cv2.resize(depth, (img_cv.shape[1], img_cv.shape[0]))

    # Extract depth at given coordinates
    x_coords = image_coords[:, 0].astype(int)
    y_coords = image_coords[:, 1].astype(int)

    # Clip coordinates to image bounds
    x_coords = np.clip(x_coords, 0, depth_resized.shape[1] - 1)
    y_coords = np.clip(y_coords, 0, depth_resized.shape[0] - 1)

    return depth_resized[y_coords, x_coords]

In [None]:
# Function to compute depth and local patches at given image coordinates using MiDaS
import numpy as np
import cv2
import torch

def get_depth_patch_at_coords(img_filename, image_coords, model, transform, patch_size=17):
    """
    Computes depth values and local patches at specified image coordinates using MiDaS.

    Parameters:
        img_filename (str): Path to the image file.
        image_coords (np.ndarray): Array of shape (N, 2) with (x, y) pixel coordinates.
        model (torch.nn.Module): Preloaded MiDaS model.
        transform (callable): MiDaS transform function.
        patch_size (int): Size of the square patch (default: 17)

    Returns:
        Tuple[np.ndarray, np.ndarray]: 
            - Depth values at the given coordinates, shape (N,)
            - Depth patches around each coordinate, shape (N, patch_size, patch_size)
    """
    # Load and convert image
    img_cv = cv2.imread(img_filename)
    if img_cv is None:
        raise FileNotFoundError(f"Image not found: {img_filename}")
    img_rgb = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)

    # Apply MiDaS transform
    input_tensor = transform(img_rgb)

    # Predict depth
    with torch.no_grad():
        prediction = model(input_tensor)
        depth = prediction.squeeze().cpu().numpy()

    # Resize depth to match original image
    depth_resized = cv2.resize(depth, (img_cv.shape[1], img_cv.shape[0]))

    # Prepare output containers
    N = image_coords.shape[0]
    depth_values = np.zeros(N)
    depth_patches = np.zeros((N, patch_size, patch_size))

    half_size = patch_size // 2
    h, w = depth_resized.shape

    for i, (x, y) in enumerate(image_coords.astype(int)):
        # Clip center coordinates
        x = np.clip(x, 0, w - 1)
        y = np.clip(y, 0, h - 1)

        # Extract single depth value
        depth_values[i] = depth_resized[y, x]

        # Compute patch bounds
        x_min = max(x - half_size, 0)
        x_max = min(x + half_size + 1, w)
        y_min = max(y - half_size, 0)
        y_max = min(y + half_size + 1, h)

        # Extract patch
        patch = depth_resized[y_min:y_max, x_min:x_max]

        # Pad if near edges
        pad_y = patch_size - patch.shape[0]
        pad_x = patch_size - patch.shape[1]
        patch_padded = np.pad(patch, ((0, pad_y), (0, pad_x)), mode='constant', constant_values=np.nan)

        depth_patches[i] = patch_padded

    return depth_values, depth_patches

In [None]:
# Main processing loop
data_rows = []
for scene in nusc.scene:
    scene_token = scene['token']
    scene_name = scene['name']
    sample_token = scene['first_sample_token']

    while sample_token:

        sample, cam_token, cam_data, radar_token, radar_data, radar_pc, image_path = get_sample_details(sample_token)
        radar_pc_s2, radar_pc_s4, points_im = get_transformed_data(radar_data, cam_data)
        
        # First 3 columns from radar_pc_s2
        radar_pc_subset = radar_pc.points.T[:, :3]  # shape: (N_features, 3)
        
        # First 3 columns from radar_pc_s2
        radar_s2_subset = radar_pc_s2.points.T[:, :3]  # shape: (N_features, 3)
        
        # All columns from radar_pc_s4
        radar_s4_all = radar_pc_s4.points.T  # shape: (N_features, N_points)
        # All columns from points_im
        points_im_T = points_im.T  # shape: (N_points, 2 or 3)
        
        
        num_points = radar_s4_all.shape[0]
        
        
        image_coords = points_im_T[:, :2]  # shape: (N_points, 2)
        depth_values, depth_patches = get_depth_patch_at_coords(image_path, image_coords, model, transform)
        
        print(f"{sample_token} - {num_points}")
        
        for i in range(num_points):
            row = {
                'sample_token': sample_token,
                'x_global': radar_s2_subset[i, 0],
                'y_global': radar_s2_subset[i, 1],
                'z_global': radar_s2_subset[i, 2],
                'radar_token': radar_token,
                'radar_x': radar_pc_subset[i, 0],
                'radar_y': radar_pc_subset[i, 1],
                'radar_z': radar_pc_subset[i, 2],
                'radar_cam_x': radar_s4_all[i, 0],
                'radar_cam_y': radar_s4_all[i, 1],
                'radar_cam_z': radar_s4_all[i, 2],
                'dyn_prop': radar_s4_all[i, 3],
                'cluster_id': radar_s4_all[i, 4],
                'rcs': radar_s4_all[i, 5],
                'vx': radar_s4_all[i, 6],
                'vy': radar_s4_all[i, 7],
                'vx_comp': radar_s4_all[i, 8],
                'vy_comp': radar_s4_all[i, 9],
                'is_quality_valid': radar_s4_all[i, 10],
                'ambig_state': radar_s4_all[i, 11],
                'x_rms': radar_s4_all[i, 12],
                'y_rms': radar_s4_all[i, 13],
                'invalid_state': radar_s4_all[i, 14],
                'pdh0': radar_s4_all[i, 15],
                'vx_rms': radar_s4_all[i, 16],
                'vy_rms': radar_s4_all[i, 17],
                'image_coord_x' : points_im_T[i, 0],
                'image_coord_y' : points_im_T[i, 1],
                'depth': depth_values[i],
                'depth_patch': depth_patches[i]
            }
        
            data_rows.append(row)

        sample_token = sample['next']

# Convert to DataFrame
df = pd.DataFrame(data_rows)

ca9a282c9e77460f8360f564131a8af5 - 74
39586f9d59004284a7114a68825e8eec - 66
356d81f38dd9473ba590f39e266f54e5 - 67
e0845f5322254dafadbbed75aaa07969 - 68
c923fe08b2ff4e27975d2bf30934383b - 71
f1e3d9d08f044c439ce86a2d6fcca57b - 68
4f545737bf3347fbbc9af60b0be9a963 - 70
7626dde27d604ac28a0240bdd54eba7a - 77
be99ffc878b24aca8956bbb4e0f97d0c - 69
9813c23a5f1448b09bb7910fea9baf20 - 62
023c4df2d451409881d8e6ea82f14704 - 69
c235638ed66145988d17f9d0601923f2 - 77
bc3c8a953f6b4dcdb77b521d89f3d9d5 - 73
1e3d79dae62742a0ad64c91679863358 - 76
2afb9d32310e4546a71cbe432911eca2 - 68
cd21dbfc3bd749c7b10a5c42562e0c42 - 68
88449a5cb1644a199c1c11f6ac034867 - 61
2ff86dc19c4644a1a88ce5ba848f56e5 - 64
bf2938e43c6f487497cda76b51bfc406 - 63
b26e791522294bec90f86fd72226e35c - 63
c844bf5a9f2243ff8f4bf2c85fe218ff - 58
fedfb3a6cb804635a0f47143f9ca8d6f - 63
965f6af5a92449348409029a5f048a38 - 61
4711bcd34644420da8bc77163431888e - 56
a34fabc7aa674713b71f98ec541eb2d4 - 61
c78067571d104caba7c568a847d56971 - 65
4246e57f0187

In [None]:
# Save to Excel
df.to_excel('PCD_ImageDepthFusion.xlsx', index=False)