In [1]:
import habitat_sim
import random
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import quaternion
import os

In [2]:
# path_to_mp3d = "path/to/habitat-api/data/scene_datasets/mp3d/"
# folder_content = os.listdir(path_to_mp3d)
# scene_paths = []
# for content in folder_content:
#     full_path = path_to_mp3d + content
#     if os.path.isdir(full_path):
#         full_path = full_path + "/" + content + ".glb"
#         # print(full_path)
#         scene_paths.append(full_path)
        
# print("Total number of scenes:", len(scene_paths))
# test_scene = scene_paths[1]
# print("Scene to be used:", test_scene)

test_scene = "/home/cengerkin/Desktop/habitat-full/habitat-api/data/scene_datasets/mp3d/testing/new/exp/exp1_semantic.ply"

sim_settings = {
    "width": 256,  # Spatial resolution of the observations    
    "height": 256,
    "scene": test_scene,  # Scene path
    "default_agent": 0,  
    "sensor_height": 0,  # Height of sensors in meters
    "color_sensor": True,  # RGB sensor
    "semantic_sensor": True,  # Semantic sensor
    "depth_sensor": True,  # Depth sensor
    "seed": 1,
}

# Simulator config

In [3]:
def make_cfg(settings):
    sim_cfg = habitat_sim.SimulatorConfiguration()
    sim_cfg.gpu_device_id = 0
    sim_cfg.scene.id = settings["scene"]
    
    # Note: all sensors must have the same resolution
    sensors = {
        "color_sensor": {
            "sensor_type": habitat_sim.SensorType.COLOR,
            "resolution": [settings["height"], settings["width"]],
            "position": [0.0, settings["sensor_height"], 0.0],
        },
        "depth_sensor": {
            "sensor_type": habitat_sim.SensorType.DEPTH,
            "resolution": [settings["height"], settings["width"]],
            "position": [0.0, settings["sensor_height"], 0.0],
        },
        "semantic_sensor": {
            "sensor_type": habitat_sim.SensorType.SEMANTIC,
            "resolution": [settings["height"], settings["width"]],
            "position": [0.0, settings["sensor_height"], 0.0],
        },  
    }
    
    sensor_specs = []
    for sensor_uuid, sensor_params in sensors.items():
        if settings[sensor_uuid]:
            sensor_spec = habitat_sim.SensorSpec()
            sensor_spec.uuid = sensor_uuid
            sensor_spec.sensor_type = sensor_params["sensor_type"]
            sensor_spec.resolution = sensor_params["resolution"]
            sensor_spec.position = sensor_params["position"]

            sensor_specs.append(sensor_spec)
            
    # Here you can specify the amount of displacement in a forward action and the turn angle
    agent_cfg = habitat_sim.agent.AgentConfiguration()
    agent_cfg.sensor_specifications = sensor_specs
    agent_cfg.action_space = {
        "move_forward": habitat_sim.agent.ActionSpec(
            "move_forward", habitat_sim.agent.ActuationSpec(amount=0.25)
        ),
        "move_backward": habitat_sim.agent.ActionSpec(
            "move_backward", habitat_sim.agent.ActuationSpec(amount=0.25)
        ),
        "move_right": habitat_sim.agent.ActionSpec(
            "move_right", habitat_sim.agent.ActuationSpec(amount=0.25)
        ),
        "move_left": habitat_sim.agent.ActionSpec(
            "move_left", habitat_sim.agent.ActuationSpec(amount=0.25)
        ),
        "turn_left": habitat_sim.agent.ActionSpec(
            "turn_left", habitat_sim.agent.ActuationSpec(amount=15.0)
        ),
        "turn_right": habitat_sim.agent.ActionSpec(
            "turn_right", habitat_sim.agent.ActuationSpec(amount=15.0)
        ),
        "look_up": habitat_sim.agent.ActionSpec(
            "look_up", habitat_sim.agent.ActuationSpec(amount=1)
        ),
        "look_down": habitat_sim.agent.ActionSpec(
            "look_down", habitat_sim.agent.ActuationSpec(amount=1)
        ),
    }
    
    return habitat_sim.Configuration(sim_cfg, [agent_cfg])

cfg = make_cfg(sim_settings)
sim = habitat_sim.Simulator(cfg)

W0806 23:35:37.690544 29661 simulator.py:167] Could not find navmesh /home/cengerkin/Desktop/habitat-full/habitat-api/data/scene_datasets/mp3d/testing/new/exp/exp1_semantic.navmesh, no collision checking will be done


# Scene semantic annotations

In [4]:
def print_scene_recur(scene, limit_output=10):
    print(f"House has {len(scene.levels)} levels, {len(scene.regions)} regions and {len(scene.objects)} objects")
    print(f"House center:{scene.aabb.center} dims:{scene.aabb.sizes}")
    
    count = 0
    for level in scene.levels:
        print(
            f"Level id:{level.id}, center:{level.aabb.center},"
            f" dims:{level.aabb.sizes}"
        )
        for region in level.regions:
            print(
                f"Region id:{region.id}, category:{region.category.name()},"
                f" center:{region.aabb.center}, dims:{region.aabb.sizes}"
            )
            for obj in region.objects:
                print(
                    f"Object id:{obj.id}, category:{obj.category.name()},"
                    f" center:{obj.aabb.center}, dims:{obj.aabb.sizes}"
                )
                count += 1
                if count >= limit_output:
                    return None

# Print semantic annotation information (id, category, bounding box details) 
# about levels, regions and objects in a hierarchical fashion
scene = sim.semantic_scene
print_scene_recur(scene)

House has 0 levels, 0 regions and 0 objects
House center:[0. 0. 0.] dims:[-inf -inf -inf]


In [5]:
random.seed(sim_settings["seed"])
sim.seed(sim_settings["seed"])

# Set agent state
agent = sim.initialize_agent(sim_settings["default_agent"])
agent_state = habitat_sim.AgentState()
agent_state.position = np.array([-1.3634586334228516, 4.651396751403809, 7.744606018066406]) # TODO: Change here as desired
# agent_state.rotation = quaternion.quaternion(1, 0, 0, 0) # TODO: Change here as desired
agent.set_state(agent_state)

# Get agent state
agent_state = agent.get_state()
print("agent_state: position", agent_state.position, "rotation", agent_state.rotation)

agent_state: position [-1.3634586  4.6513968  7.744606 ] rotation quaternion(1, 0, 0, 0)


In [6]:
from PIL import Image
from habitat_sim.utils.common import d3_40_colors_rgb

def display_sample(rgb_obs, semantic_obs, depth_obs):
    """Plot RGB, Semantic and Depth images"""
    rgb_img = Image.fromarray(rgb_obs, mode="RGBA")
    
    semantic_img = Image.new("P", (semantic_obs.shape[1], semantic_obs.shape[0]))
    semantic_img.putpalette(d3_40_colors_rgb.flatten())
    semantic_img.putdata((semantic_obs.flatten() % 40).astype(np.uint8))
    semantic_img = semantic_img.convert("RGBA")
    
    depth_img = Image.fromarray((depth_obs / 10 * 255).astype(np.uint8), mode="L")

    arr = [rgb_img, semantic_img, depth_img]
    titles = ['rgb', 'semantic', 'depth']
    plt.figure(figsize=(12 ,8))
    for i, data in enumerate(arr):
        ax = plt.subplot(1, 3, i+1)
        ax.axis('off')
        ax.set_title(titles[i])
        plt.imshow(data)
    plt.show()
    
def get_camera_matrices(position, rotation):
    rotation = quaternion.as_rotation_matrix(rotation)
    
    # Pinv: Agent/Camera pose wrt Habitat WCS
    Pinv = np.eye(4)
    Pinv[0:3, 0:3] = rotation
    Pinv[0:3, 3] = position
    # P: Habitat WCS wrt Agent/Camera
    P = np.linalg.inv(Pinv)

    return P, Pinv


# GUI with PyQt5

In [7]:
import sys

from PyQt5.QtWidgets import *
from PyQt5 import QtGui
from PyQt5.QtGui import QPixmap
from PyQt5.QtCore import Qt

from PIL.ImageQt import ImageQt

action_names = list(
    cfg.agents[
        sim_settings["default_agent"]
    ].action_space.keys()
)

action_map = {
    Qt.Key_4: "turn_left",
    Qt.Key_6: "turn_right",
    Qt.Key_8: "look_up",
    Qt.Key_5: "look_down",
    Qt.Key_W: "move_forward",
    Qt.Key_A: "move_left",
    Qt.Key_S: "move_backward",
    Qt.Key_D: "move_right"
}

class MainWindow(QWidget):
    def __init__(self):
        super().__init__()
        self.initialize()
        
    def get_images(self, observations):
        rgb_img = observations["color_sensor"]
        rgb_img = Image.fromarray(rgb_img, mode="RGBA")
        rgb_img = ImageQt(rgb_img)
        rgb_img = QPixmap.fromImage(rgb_img)
        
        sem = observations["semantic_sensor"]
        sem_img = Image.new("P", (sem.shape[1], sem.shape[0]))
        sem_img.putpalette(d3_40_colors_rgb.flatten())
        sem_img.putdata((sem.flatten() % 40).astype(np.uint8))
        sem_img = sem_img.convert("RGBA")
        sem_img = ImageQt(sem_img)
        sem_img = QPixmap.fromImage(sem_img)
        
        dep_img = observations["depth_sensor"]
        dep_img = Image.fromarray((dep_img / 10 * 255).astype(np.uint8), mode="L")
        dep_img = ImageQt(dep_img)
        dep_img = QPixmap.fromImage(dep_img)
        return rgb_img, sem_img, dep_img 
        
    def initialize(self):
        self.title = "Habitat Agent"
        self.top = 0
        self.left = 0
        self.width = 256*3
        self.height = 456
        self.timestep = 0
        
        hbox = QHBoxLayout()
        
        rgb_panel = QFrame()
        rgb_panel.setFrameShape(QFrame.StyledPanel)
        self.rgb_panel = QLabel(rgb_panel)
        
        seg_panel = QFrame()
        seg_panel.setFrameShape(QFrame.StyledPanel)
        self.seg_panel = QLabel(seg_panel)
        
        dep_panel = QFrame()
        dep_panel.setFrameShape(QFrame.StyledPanel)
        self.dep_panel = QLabel(dep_panel)
        
        self.info_panel = info_panel = QPlainTextEdit()
        info_panel.setReadOnly(True)

        split1 = QSplitter(Qt.Horizontal)
        split1.addWidget(rgb_panel)
        split1.addWidget(seg_panel)
        split1.setSizes([256,256])
        
        split2 = QSplitter(Qt.Horizontal)
        split2.addWidget(split1)
        split2.addWidget(dep_panel)
        split2.setSizes([512,256])
        
        split3 = QSplitter(Qt.Vertical)
        split3.addWidget(split2)
        split3.addWidget(info_panel)
        split3.setSizes([256,200])
        hbox.addWidget(split3)
        
        # Render images on respective windows
        observations = sim.get_sensor_observations()
        agent_state = agent.get_state()
        
        rgb, seg, dep = self.get_images(observations)
        self.rgb_panel.setPixmap(rgb)
        self.seg_panel.setPixmap(seg)
        self.dep_panel.setPixmap(dep)
        agent_state = agent.get_state()
        log = "t: {}, Position: {}, Orientation: {}".format(self.timestep, agent_state.position, agent_state.rotation)
        self.info_panel.appendPlainText(log)
        
        self.setLayout(hbox)
        self.setWindowTitle(self.title)
        self.setGeometry(self.left, self.top, self.width, self.height)
        
        self.show()

    def keyPressEvent(self, event):
        key = event.key()
        if key == Qt.Key_C:
            self.info_panel.clear()
        elif key == Qt.Key_Escape:
            self.close() 
        else:
            action = action_map[key]
            observations = sim.step(action)
            self.timestep += 1
            
            rgb, seg, dep = self.get_images(observations)
            self.rgb_panel.setPixmap(rgb)
            self.seg_panel.setPixmap(seg)
            self.dep_panel.setPixmap(dep)
            agent_state = agent.get_state()
            log = "t:{}, Position: {}, Orientation: {}".format(self.timestep, agent_state.position, agent_state.rotation)
            self.info_panel.appendPlainText(log)

#             agent_state = agent.get_state()
#             to_cam, to_habitat = get_camera_matrices(agent_state.position, agent_state.rotation)
#             print("Agent state, Position:", agent_state.position, "Rotation:", agent_state.rotation)
#             print("From Habitat to Camera:")
#             print(to_cam)
#             print("From Camera Matrix to Habitat:")
#             print(to_habitat)


app = QApplication([])

window = MainWindow()

window.show()
app.exec_()

0

# Take Specified Actions

In [None]:
from IPython.display import clear_output
action_names = list(
    cfg.agents[
        sim_settings["default_agent"]
    ].action_space.keys()
)

action_map = {
    49: "turn_left",
    50: "turn_right",
    51: "look_up",
    52: "look_down",
    **dict.fromkeys((87, 119), "move_forward"),
    **dict.fromkeys((65, 97), "move_left"),
    **dict.fromkeys((83, 115), "move_backward"),
    **dict.fromkeys((68, 100), "move_right")
}

run = True

while run:
    try:
        button = ord(input("Press a key [W/A/S/D/1/2/3/4]: "))
        if button not in (69, 99):
            #clear_output(): flush output
            action = action_map[button]
            print("Action", action)
            observations = sim.step(action)
        else:
            observations = sim.get_sensor_observations()
        
        rgb = observations["color_sensor"]
        semantic = observations["semantic_sensor"]
        depth = observations["depth_sensor"]

        display_sample(rgb, semantic, depth)
        
#         agent_state = agent.get_state()
#         to_cam, to_habitat = get_camera_matrices(agent_state.position, agent_state.rotation)
#         print("Agent state, Position:", agent_state.position, "Rotation:", agent_state.rotation)
#         print("From Habitat to Camera:")
#         print(to_cam)
#         print("From Camera Matrix to Habitat:")
#         print(to_habitat)

    except Exception as e:
        print(e)
        run = False

# Take Random Actions and Display Sensor Data

In [None]:
total_frames = 0
action_names = list(
    cfg.agents[
        sim_settings["default_agent"]
    ].action_space.keys()
)

print(action_names)

max_frames = 5

while total_frames < max_frames:
    action = random.choice(action_names)
    print("action", action)
    observations = sim.step(action)
    rgb = observations["color_sensor"]
    semantic = observations["semantic_sensor"]
    depth = observations["depth_sensor"]
    
    display_sample(rgb, semantic, depth)
    
    total_frames += 1