# Main simulator loop

## Imports, config

In [4]:
import os
from pathlib import Path
from path_utils import use_path

base_dir = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()

# Use relative paths from the base directory
config_path = str(base_dir / "configs/datasets/nuplan/8cams_undistorted.yaml")
checkpoint_path = str(base_dir / "output/master-project/run_omnire_undistorted_8cams_0")
n_steps = 100

## Initialize simulator, environment model and the random agent

In [14]:
from simulator import Simulator, NuPlan
simulator = NuPlan()

Initializing NuPlan simulator...
<nuplan.common.actor_state.ego_state.EgoState object at 0x7fdcde268250>
NuPlan initialized.


In [36]:
import numpy as np
import torch

from sim_types import State
from environment_model import OmniReSetup

class OmniReModel:
    def __init__(self, setup: OmniReSetup):
        self.data_cfg = setup.data_cfg
        self.train_cfg = setup.train_cfg
        self.trainer = setup.trainer
        self.dataset = setup.dataset
        self.device = setup.device
        self.camera_matrix_cache = {}
    
    def render_single_frame(self, frame_data: dict) -> np.ndarray:
        """
        Render a single frame based on provided frame data.
        
        Args:
            frame_data (dict): Dictionary containing camera and image info for the frame
            
        Returns:
            np.ndarray: The rendered RGB image as a numpy array
        """
        with torch.no_grad():
            # Create copies of the dictionaries to avoid modifying originals
            cam_infos = {}
            image_infos = {}
            
            # Move camera info tensors to GPU
            for key, value in frame_data["cam_infos"].items():
                if isinstance(value, torch.Tensor):
                    cam_infos[key] = value.cuda(non_blocking=True)
                else:
                    cam_infos[key] = value
            
            # Move image info tensors to GPU
            for key, value in frame_data["image_infos"].items():
                if isinstance(value, torch.Tensor):
                    image_infos[key] = value.cuda(non_blocking=True)
                else:
                    image_infos[key] = value

            # Perform rendering
            outputs = self.trainer(
                image_infos=image_infos,
                camera_infos=cam_infos,
                novel_view=True
            )

            # Extract RGB image and return
            rgb = outputs["rgb"].cpu().numpy().clip(
                min=1.e-6, max=1-1.e-6
            )

            # If depth is needed, you can extract it too
            if "depth" in outputs:
                depth = outputs["depth"].cpu().numpy()
                return rgb, depth

            return rgb

    def get_sensor_output(self, state):
        """
        Generate sensor output (RGB image) for the given simulation state.
        
        Args:
            state (dict): Current state of the simulation containing:
                - camera_position (np.ndarray): 3D position of the camera
                - camera_rotation (np.ndarray): Rotation of the camera (e.g., quaternion)
                - vehicle_positions (dict): Dictionary mapping vehicle IDs to positions
                - vehicle_rotations (dict): Dictionary mapping vehicle IDs to rotations
                - timestamp (float): Current simulation time
                
        Returns:
            dict: Sensor outputs including rendered image
        """
        # Prepare frame data for rendering based on current state
        frame_data = self.prepare_frame_data(state)

        # Render the image
        rgb_image = self.render_single_frame(frame_data)

        # Create sensor output dictionary
        sensor_output = {
            "rgb_image": rgb_image,
            # Add other sensor outputs as needed
        }

        return sensor_output

    def prepare_frame_data(self, state: State):
        """
        Prepare the frame data needed for rendering based on simulation state.
        
        Args:
            state (dict): Current state of the simulation
            
        Returns:
            dict: Frame data dictionary with cam_infos and image_infos
        """
        # Extract camera information
        camera_position = torch.tensor([
            state.ego_pos.x,
            state.ego_pos.y,
            state.ego_pos.z
        ], dtype=torch.float32)
        
        # Assuming state.ego_pos.heading is a scalar representing the yaw angle
        heading = torch.tensor(state.ego_pos.heading)
        half_yaw = heading / 2
        camera_rotation = torch.tensor([
            0.0,                     # x (roll)
            0.0,                     # y (pitch)
            torch.sin(half_yaw),    # z
            torch.cos(half_yaw)     # w
        ])
        
        timestamp = state.timestamp.time_us

        # Get camera matrix
        c2w = self.compute_camera_matrix(camera_position, camera_rotation)
        
        # these are the intrinsics for the camera TODO: load from file
        # 1.545000000000000000e+03
        # 1.545000000000000000e+03
        # 9.600000000000000000e+02
        # 5.600000000000000000e+02
        # -3.561230000000000229e-01
        # 1.725450000000000039e-01
        # -2.129999999999999949e-03
        # 4.640000000000000027e-04
        # -5.231000000000000233e-02
        
        intrinsics = torch.tensor([
            1.545000000000000000e+03,  # fx
            1.545000000000000000e+03,  # fy
            9.600000000000000000e+02,  # cx
            5.600000000000000000e+02,  # cy
            -3.561230000000000229e-01, # skew_x
            1.725450000000000039e-01,  # skew_y
            -2.129999999999999949e-03, # skew_xy
            4.640000000000000027e-04,  # skew_yx
            -5.231000000000000233e-02   # skew_yy
        ], dtype=torch.float32, device=self.device)
        
        # Reshape intrinsics to match the expected input shape [3,3]
        intrinsics = intrinsics.view(3, 3).to(self.device)

        # Create camera information dictionary
        cam_infos = {
            "intrinsics": intrinsics,  # Placeholder for intrinsics
            "width": torch.tensor([1280], device=self.device),
            "height": torch.tensor([720], device=self.device),
            "camera_to_world": c2w,
            # Add other camera parameters required by your model
        }

        # Vehicle information
        vehicles = getattr(state, "vehicle_pos_list", {})
        
        # vehicles is on the form Position(x=664432.4584765462, y=3998282.198022293, z=0, heading=-1.5377766955975682
        
        vehicle_positions = {
            vehicle_id: np.array([vehicle.x, vehicle.y, vehicle.z], dtype=np.float32)
            for vehicle_id, vehicle in enumerate(vehicles)
        }
        vehicle_rotations = {
            vehicle_id: np.array([0.0, 0.0, np.sin(vehicle.heading / 2), np.cos(vehicle.heading / 2)], dtype=np.float32)
            for vehicle_id, vehicle in enumerate(vehicles)
        }

        # Create vehicle information for the renderer
        # This depends on how your renderer expects vehicle data
        # vehicle_data = {
        #     vehicle_id: {
        #         "position": torch.tensor(position, device=self.device),
        #         "rotation": torch.tensor(vehicle_rotations.get(vehicle_id, np.array([1.0, 0.0, 0.0, 0.0])), device=self.device),
        #     }
        #     for vehicle_id, position in vehicle_positions.items()
        # }
        
        vehicle_data = {
            vehicle_id: {
                "position": torch.tensor(position, device=self.device),
                "rotation": torch.tensor(vehicle_rotations.get(vehicle_id, np.array([1.0, 0.0, 0.0, 0.0])), device=self.device),
            }
            for vehicle_id, position in vehicle_positions.items()
        }

        # Create image information dictionary
        # This includes any additional data needed for rendering
        
        normalized_time = (timestamp - self.dataset.start_timestep) / (self.dataset.end_timestep - self.dataset.start_timestep)
        
        image_infos = {
            "timestamp": torch.tensor([timestamp], device=self.device),
            "vehicles": vehicle_data,
            "normed_time": torch.tensor([normalized_time], device=self.device),
            "img_idx": torch.tensor([0]),
            "viewdirs": self.compute_viewdirs(
                intrinsics=cam_infos["intrinsics"],
                H=cam_infos["height"],
                W=cam_infos["width"],
                device=self.device
            )
            # Add other required image information
        }

        return {
            "cam_infos": cam_infos,
            "image_infos": image_infos
        }
    
    def compute_viewdirs(self, intrinsics, H, W, device):
        """Returns (H, W, 3) ray directions in camera coordinates"""
        fx, fy = intrinsics[0, 0], intrinsics[1, 1]
        cx, cy = intrinsics[0, 2], intrinsics[1, 2]

        i, j = torch.meshgrid(
            torch.arange(W, device=device),
            torch.arange(H, device=device),
            indexing='xy'
        )
        dirs = torch.stack([
            (i - cx) / fx,
            (j - cy) / fy,
            torch.ones_like(i)
        ], dim=-1)  # [H, W, 3]
        dirs = dirs / dirs.norm(dim=-1, keepdim=True)  # Normalize
        return dirs

    def compute_camera_matrix(self, position, rotation):
        """
        Compute the camera-to-world transformation matrix from position and rotation.
        
        Args:
            position (np.ndarray): 3D position vector [x, y, z]
            rotation (np.ndarray): Rotation as quaternion [w, x, y, z]
            
        Returns:
            torch.Tensor: 4x4 camera-to-world transformation matrix
        """
        # Create a cache key for this position and rotation
        cache_key = (tuple(position), tuple(rotation))

        # Check if we've already computed this matrix
        if cache_key in self.camera_matrix_cache:
            return self.camera_matrix_cache[cache_key]

        # Convert position to tensor
        position_tensor = torch.tensor(position, dtype=torch.float32, device=self.device)

        # Initialize transformation matrix
        c2w = torch.eye(4, device=self.device)

        # Set translation component
        c2w[:3, 3] = position_tensor

        # Convert quaternion to rotation matrix
        # Assuming quaternion is [w, x, y, z]
        w, x, y, z = rotation

        # Construct rotation matrix from quaternion
        rot_matrix = torch.tensor([
            [1 - 2*y*y - 2*z*z, 2*x*y - 2*w*z, 2*x*z + 2*w*y],
            [2*x*y + 2*w*z, 1 - 2*x*x - 2*z*z, 2*y*z - 2*w*x],
            [2*x*z - 2*w*y, 2*y*z + 2*w*x, 1 - 2*x*x - 2*y*y]
        ], dtype=torch.float32, device=self.device)

        # Set rotation component of the transformation matrix
        c2w[:3, :3] = rot_matrix

        # Store in cache for future use
        self.camera_matrix_cache[cache_key] = c2w

        return c2w

    def update_vehicle_positions(self, state, vehicle_id, new_position, new_rotation):
        """
        Update the position and rotation of a specific vehicle in the state.
        
        Args:
            state (dict): Current simulation state
            vehicle_id (int): ID of the vehicle to update
            new_position (np.ndarray): New 3D position
            new_rotation (np.ndarray): New rotation (quaternion)
            
        Returns:
            dict: Updated state dictionary
        """
        # Create a copy of the state to avoid modifying the original
        updated_state = state.copy()

        # Initialize vehicle dictionaries if they don't exist
        if "vehicle_positions" not in updated_state:
            updated_state["vehicle_positions"] = {}
        if "vehicle_rotations" not in updated_state:
            updated_state["vehicle_rotations"] = {}

        # Update vehicle position and rotation
        updated_state["vehicle_positions"][vehicle_id] = new_position
        updated_state["vehicle_rotations"][vehicle_id] = new_rotation

        return updated_state

    def create_trajectory(self, start_position, end_position, num_steps):
        """
        Create a linear trajectory between two positions.
        
        Args:
            start_position (np.ndarray): Starting position [x, y, z]
            end_position (np.ndarray): Ending position [x, y, z]
            num_steps (int): Number of steps in the trajectory
            
        Returns:
            np.ndarray: Array of positions along the trajectory
        """
        return np.linspace(start_position, end_position, num_steps)

In [7]:
base_dir = Path.cwd()

setup = None

# Now use the context manager for clean path handling
with use_path("drivestudio", True):
    # Define paths relative to the drivestudio directory
    relative_config_path = "configs/datasets/nuplan/8cams_undistorted.yaml"
    relative_checkpoint_path = "output/master-project/run_omnire_undistorted_8cams_0"
    
    print(f"Working directory: {os.getcwd()}")
    print(f"Config path (relative to drivestudio): {relative_config_path}")
    print(f"Absolute config path: {os.path.abspath(relative_config_path)}")
    
    # Check if these files exist in this context
    if not os.path.exists(relative_config_path):
        print(f"ERROR: Config file not found at {os.path.abspath(relative_config_path)}")
    if not os.path.exists(relative_checkpoint_path):
        print(f"ERROR: Checkpoint directory not found at {os.path.abspath(relative_checkpoint_path)}")
    
    # Only initialize if files exist
    if os.path.exists(relative_config_path) and os.path.exists(relative_checkpoint_path):
        setup = OmniReSetup(relative_config_path, relative_checkpoint_path)
        print("Successfully initialized OmniRe environment model")
    else:
        print("Failed to initialize environment model due to missing files")

Added /cluster/home/larstond/master-project/drivestudio to sys.path
Changed working directory to /cluster/home/larstond/master-project/drivestudio
Working directory: /cluster/home/larstond/master-project/drivestudio
Config path (relative to drivestudio): configs/datasets/nuplan/8cams_undistorted.yaml
Absolute config path: /cluster/home/larstond/master-project/drivestudio/configs/datasets/nuplan/8cams_undistorted.yaml
Loading config from: configs/datasets/nuplan/8cams_undistorted.yaml
Loading checkpoint from: output/master-project/run_omnire_undistorted_8cams_0
Loading dataset...


Loading images:   1%|▏         | 4/300 [00:00<00:15, 18.73it/s]

undistorting rgb


Loading images: 100%|██████████| 300/300 [00:07<00:00, 38.62it/s]
Loading dynamic masks:   2%|▏         | 5/300 [00:00<00:06, 45.38it/s]

undistorting dynamic mask


Loading dynamic masks: 100%|██████████| 300/300 [00:03<00:00, 79.65it/s]
Loading human masks:   3%|▎         | 9/300 [00:00<00:03, 82.40it/s]

undistorting human mask


Loading human masks: 100%|██████████| 300/300 [00:03<00:00, 87.57it/s]
Loading vehicle masks:   3%|▎         | 9/300 [00:00<00:03, 86.44it/s]

undistorting vehicle mask


Loading vehicle masks: 100%|██████████| 300/300 [00:03<00:00, 83.66it/s]
Loading sky masks:   4%|▍         | 13/300 [00:00<00:02, 119.77it/s]

undistorting sky mask


Loading sky masks: 100%|██████████| 300/300 [00:02<00:00, 123.88it/s]
Loading images:   2%|▏         | 6/300 [00:00<00:05, 54.14it/s]

undistorting rgb


Loading images: 100%|██████████| 300/300 [00:06<00:00, 49.99it/s]
Loading dynamic masks:   3%|▎         | 8/300 [00:00<00:03, 73.91it/s]

undistorting dynamic mask


Loading dynamic masks: 100%|██████████| 300/300 [00:03<00:00, 85.37it/s]
Loading human masks:   6%|▌         | 18/300 [00:00<00:03, 88.16it/s]

undistorting human mask


Loading human masks: 100%|██████████| 300/300 [00:03<00:00, 87.10it/s]
Loading vehicle masks:   6%|▌         | 18/300 [00:00<00:03, 87.42it/s]

undistorting vehicle mask


Loading vehicle masks: 100%|██████████| 300/300 [00:03<00:00, 85.52it/s]
Loading sky masks:   5%|▍         | 14/300 [00:00<00:02, 132.43it/s]

undistorting sky mask


Loading sky masks: 100%|██████████| 300/300 [00:02<00:00, 125.38it/s]
Loading images:   3%|▎         | 9/300 [00:00<00:06, 45.62it/s]

undistorting rgb


Loading images: 100%|██████████| 300/300 [00:06<00:00, 49.96it/s]
Loading dynamic masks:   2%|▏         | 5/300 [00:00<00:05, 49.94it/s]

undistorting dynamic mask


Loading dynamic masks: 100%|██████████| 300/300 [00:03<00:00, 86.13it/s]
Loading human masks:   6%|▌         | 18/300 [00:00<00:03, 86.76it/s]

undistorting human mask


Loading human masks: 100%|██████████| 300/300 [00:03<00:00, 87.50it/s]
Loading vehicle masks:   6%|▌         | 18/300 [00:00<00:03, 88.45it/s]

undistorting vehicle mask


Loading vehicle masks: 100%|██████████| 300/300 [00:03<00:00, 87.47it/s]
Loading sky masks:   9%|▊         | 26/300 [00:00<00:02, 127.09it/s]

undistorting sky mask


Loading sky masks: 100%|██████████| 300/300 [00:02<00:00, 123.74it/s]
Loading images:   3%|▎         | 10/300 [00:00<00:06, 47.89it/s]

undistorting rgb


Loading images: 100%|██████████| 300/300 [00:06<00:00, 46.28it/s]
Loading dynamic masks:   6%|▌         | 17/300 [00:00<00:03, 81.62it/s]

undistorting dynamic mask


Loading dynamic masks: 100%|██████████| 300/300 [00:03<00:00, 85.42it/s]
Loading human masks:   5%|▌         | 15/300 [00:00<00:03, 72.83it/s]

undistorting human mask


Loading human masks: 100%|██████████| 300/300 [00:03<00:00, 86.65it/s]
Loading vehicle masks:   6%|▌         | 18/300 [00:00<00:03, 87.41it/s]

undistorting vehicle mask


Loading vehicle masks: 100%|██████████| 300/300 [00:03<00:00, 86.82it/s]
Loading sky masks:   7%|▋         | 22/300 [00:00<00:02, 110.49it/s]

undistorting sky mask


Loading sky masks: 100%|██████████| 300/300 [00:02<00:00, 126.24it/s]
Loading images:   3%|▎         | 10/300 [00:00<00:06, 47.86it/s]

undistorting rgb


Loading images: 100%|██████████| 300/300 [00:06<00:00, 46.27it/s]
Loading dynamic masks:   3%|▎         | 10/300 [00:00<00:03, 91.59it/s]

undistorting dynamic mask


Loading dynamic masks: 100%|██████████| 300/300 [00:03<00:00, 87.58it/s]
Loading human masks:   6%|▌         | 18/300 [00:00<00:03, 87.04it/s]

undistorting human mask


Loading human masks: 100%|██████████| 300/300 [00:03<00:00, 88.08it/s]
Loading vehicle masks:   3%|▎         | 9/300 [00:00<00:03, 84.88it/s]

undistorting vehicle mask


Loading vehicle masks: 100%|██████████| 300/300 [00:03<00:00, 86.20it/s]
Loading sky masks:   3%|▎         | 10/300 [00:00<00:02, 96.82it/s]

undistorting sky mask


Loading sky masks: 100%|██████████| 300/300 [00:02<00:00, 124.15it/s]
Loading images:   2%|▏         | 6/300 [00:00<00:09, 30.40it/s]

undistorting rgb


Loading images: 100%|██████████| 300/300 [00:06<00:00, 47.92it/s]
Loading dynamic masks:   2%|▏         | 6/300 [00:00<00:05, 54.89it/s]

undistorting dynamic mask


Loading dynamic masks: 100%|██████████| 300/300 [00:03<00:00, 82.36it/s]
Loading human masks:   6%|▌         | 18/300 [00:00<00:03, 86.58it/s]

undistorting human mask


Loading human masks: 100%|██████████| 300/300 [00:03<00:00, 87.17it/s]
Loading vehicle masks:   3%|▎         | 9/300 [00:00<00:03, 89.90it/s]

undistorting vehicle mask


Loading vehicle masks: 100%|██████████| 300/300 [00:03<00:00, 85.63it/s]
Loading sky masks:   4%|▍         | 13/300 [00:00<00:02, 126.63it/s]

undistorting sky mask


Loading sky masks: 100%|██████████| 300/300 [00:02<00:00, 123.62it/s]
Loading images:   2%|▏         | 6/300 [00:00<00:05, 51.05it/s]

undistorting rgb


Loading images: 100%|██████████| 300/300 [00:07<00:00, 42.07it/s]
Loading dynamic masks:   3%|▎         | 9/300 [00:00<00:03, 86.91it/s]

undistorting dynamic mask


Loading dynamic masks: 100%|██████████| 300/300 [00:03<00:00, 87.56it/s]
Loading human masks:   3%|▎         | 9/300 [00:00<00:03, 83.61it/s]

undistorting human mask


Loading human masks: 100%|██████████| 300/300 [00:03<00:00, 86.78it/s]
Loading vehicle masks:   3%|▎         | 9/300 [00:00<00:03, 86.01it/s]

undistorting vehicle mask


Loading vehicle masks: 100%|██████████| 300/300 [00:03<00:00, 88.07it/s]
Loading sky masks:   9%|▊         | 26/300 [00:00<00:02, 127.48it/s]

undistorting sky mask


Loading sky masks: 100%|██████████| 300/300 [00:02<00:00, 125.29it/s]
Loading images:   2%|▏         | 6/300 [00:00<00:05, 52.16it/s]

undistorting rgb


Loading images: 100%|██████████| 300/300 [00:06<00:00, 49.62it/s]
Loading dynamic masks:   2%|▏         | 5/300 [00:00<00:06, 47.38it/s]

undistorting dynamic mask


Loading dynamic masks: 100%|██████████| 300/300 [00:03<00:00, 78.72it/s]
Loading human masks:   6%|▌         | 18/300 [00:00<00:03, 87.47it/s]

undistorting human mask


Loading human masks: 100%|██████████| 300/300 [00:03<00:00, 87.48it/s]
Loading vehicle masks:   6%|▌         | 18/300 [00:00<00:03, 86.30it/s]

undistorting vehicle mask


Loading vehicle masks: 100%|██████████| 300/300 [00:03<00:00, 85.15it/s]
Loading sky masks:   3%|▎         | 8/300 [00:00<00:03, 75.68it/s]

undistorting sky mask


Loading sky masks: 100%|██████████| 300/300 [00:02<00:00, 107.35it/s]


End timestep: 300


Loading SMPL: 100%|██████████| 300/300 [00:01<00:00, 290.71it/s]
  lidar_points = torch.from_numpy(lidar_info[:, :3]).float()
Loading lidar: 100%|██████████| 300/300 [00:27<00:00, 10.96it/s]
Projecting lidar pts on images for camera CAM_F0: 100%|██████████| 300/300 [01:28<00:00,  3.38it/s]
Projecting lidar pts on images for camera CAM_L0: 100%|██████████| 300/300 [01:35<00:00,  3.14it/s]
Projecting lidar pts on images for camera CAM_R0: 100%|██████████| 300/300 [01:47<00:00,  2.80it/s]
Projecting lidar pts on images for camera CAM_L1: 100%|██████████| 300/300 [02:00<00:00,  2.49it/s]
Projecting lidar pts on images for camera CAM_R1: 100%|██████████| 300/300 [02:13<00:00,  2.25it/s]
Projecting lidar pts on images for camera CAM_L2: 100%|██████████| 300/300 [02:11<00:00,  2.27it/s]
Projecting lidar pts on images for camera CAM_R2: 100%|██████████| 300/300 [01:50<00:00,  2.71it/s]
Projecting lidar pts on images for camera CAM_B0: 100%|██████████| 300/300 [01:33<00:00,  3.20it/s]


Using predefined pose: da_pose
OmniRe environment model initialized with checkpoint: output/master-project/run_omnire_undistorted_8cams_0/config.yaml
Successfully initialized OmniRe environment model
Restored original sys.path
Restored working directory to /cluster/home/larstond/master-project


In [37]:
environment_model = OmniReModel(setup)

In [12]:
from agent import Agent, RandomAgent
agent = RandomAgent()

## Do the simulation loop

In [38]:

error_history = []

for _ in range(n_steps):
    state = simulator.get_state()
    sensor_output = environment_model.get_sensor_output(state)
    action = agent.get_action(sensor_output)
    simulator.do_action(action)
    error_history.append(simulator.get_state())


  position_tensor = torch.tensor(position, dtype=torch.float32, device=self.device)


TypeError: arange() received an invalid combination of arguments - got (Tensor, device=torch.device), but expected one of:
 * (Number end, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (Number start, Number end, *, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (Number start, Number end, Number step, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
