In [2]:
import sys
sys.path.append('/home/olemke/workspace/code/spot-compose-v2/source')

In [3]:
import os

import numpy as np
import pprint
import torch

import open3d as o3d
from utils.recursive_config import Config
from utils.docker_interfaces.openmask_interface import get_mask_points, get_scene_dict


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [21]:
config = Config()

INTERMEDIATE_VISUALIZATIONS = True
ITEM = "alarm clock"
MIN_MASK_CONFIDENCE = 0.3
MIN_CLIP_SIMILARITY = 0.22

In [22]:
def round_floats(obj, decimals=2):
    if isinstance(obj, float):
        return round(obj, decimals)
    elif isinstance(obj, dict):
        return {k: round_floats(v, decimals) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [round_floats(elem, decimals) for elem in obj]
    elif isinstance(obj, tuple):
        return tuple([round_floats(elem, decimals) for elem in obj])
    return obj

**Task:**

You will be provided with the name of an item (after the keyword **[ITEM]**) that needs to be found within an indoor scene (e.g., **"[ITEM] water bottle"**). Your task is to generate a list of large objects or areas (such as tables, chairs, beds, etc.) where one might look for the specified item. Consider locations where the item is commonly placed, stored, or might logically be found.

**Guidelines:**

- Focus on common indoor locations relevant to the item.
- Think about typical use-cases and where the item is usually kept.
- Include various rooms or areas if applicable (e.g., kitchen, living room, bedroom).
- Ensure that the suggestions are practical and make sense spatially.
- Include answers like on top of, underneath, inside of, or around certain objects
- Do not repeat objects (unless they have different spatial relations) that are very similar to each other, like shelf, drawer, and cabinet
- For now, only focus on an office setting (i.e. only room is office)

**Format:**

- Output your answer as a Python list of tuple of strings. The first string specifies the object, the second string specifies a spatial relation to the object.
- The list should be formatted exactly as shown below, including quotation marks and commas.
- The items should be ordered in order of most likely to least likely.
- Output at least 3, and no more than 5 suggestions

Example format:

```python
[("Location 1", "Spatial Relation 1"), ("Location 2", "Spatial Relation 2"), ("Location 3", "Spatial Relation 3"), ...]
```

**Example:**

If the item is **[ITEM] water bottle**, your output could look like:

```python
[("desk", "on top of"), ("chair", "next to"), ("cabinet", "inside of"), ("desk", "underneath")]
```

**Instructions:**

- Do not include any additional text outside the Python list.
- Do not explain your reasoning.
- Ensure the list is relevant to the specified item.

In [23]:
answers = [("desk", "underneath"), ("trash bucket", "inside of"), ("filing cabinet", "next to"), ("door", "near"), ("printer station", "next to")]

In [24]:
pcd_name = config["pre_scanned_graphs"]["high_res"]
pcd_path = os.path.join(
    config.get_subpath("aligned_point_clouds"), pcd_name, "scene.ply"
)
pcd = o3d.io.read_point_cloud(str(pcd_path))
pcd_points = np.array(pcd.points)

pp = pprint.PrettyPrinter(indent=4, width=200)

scene_dict = get_scene_dict(config, vis_block=False, min_mask_confidence=MIN_MASK_CONFIDENCE)
items_not_found = []
# pp.pprint(scene_dict)
for answer in answers:
    item, _ = answer
    cos_sims, masks, object_ids, mask_scores = get_mask_points(item, config, MIN_MASK_CONFIDENCE, MIN_CLIP_SIMILARITY)
    nr_objects = cos_sims.shape[0]
    if nr_objects == 0:
        items_not_found.append(item)
        continue
    for idx in range(nr_objects):
        object_id = object_ids[idx]
        cos_sim = cos_sims[idx]
        # if object is already represented with higher confidence, skip
        if object_id in scene_dict["scene"] and scene_dict["scene"][object_id]["confidence"] > cos_sim:
            continue
        # else add the entry (compute bbox centroid and extents and add object into dict)
        mask = masks[idx].astype(bool)
        points = pcd_points[mask]
        bbox_min, bbox_max = np.min(points, axis=0), np.max(points, axis=0)
        centroid = (bbox_min + bbox_max) / 2
        extents = (bbox_max - bbox_min) / 2
        current_object_dict = {
            "description": item,
            "confidence": cos_sim.item(),
            "centroid": centroid.tolist(),
            "extents": extents.tolist(),
        }
        scene_dict["scene"][f"obj_{object_id}"] = current_object_dict

scene_dict["scene"]["not found"] = items_not_found        
pp.pprint(round_floats(scene_dict))

{   'scene': {   'not found': ['door', 'printer station'],
                 'obj_16': {'centroid': [2.29, 1.15, 0.07], 'confidence': 0.22, 'description': 'trash bucket', 'extents': [0.17, 0.16, 0.17]},
                 'obj_28': {'centroid': [0.33, 0.65, 0.01], 'confidence': 0.26, 'description': 'filing cabinet', 'extents': [0.17, 0.18, 0.15]},
                 'obj_3': {'centroid': [3.56, 0.43, 0.67], 'confidence': 0.25, 'description': 'filing cabinet', 'extents': [0.31, 0.45, 0.74]},
                 'obj_37': {'centroid': [1.19, 1.3, 0.25], 'confidence': 0.24, 'description': 'desk', 'extents': [0.88, 0.62, 0.37]},
                 'obj_5': {'centroid': [3.51, 1.11, 0.43], 'confidence': 0.22, 'description': 'filing cabinet', 'extents': [0.12, 0.1, 0.49]},
                 'object_11': {'centroid': [2.88, 1.05, 0.34], 'description': 'irrelevant', 'extents': [0.32, 0.32, 0.42]},
                 'object_16': {'centroid': [2.29, 1.15, 0.07], 'description': 'irrelevant', 'extents': [0.17

In [None]:
# Create a visualizer
vis = o3d.visualization.Visualizer()
vis.create_window()

# Draw the point cloud
pcd = o3d.io.read_point_cloud(str(pcd_path))
vis.add_geometry(pcd)

# Assign colors for each object ID
object_colors = {obj_id: np.random.rand(3) for obj_id in scene_dict["scene"].keys()}

for object_id, obj_data in scene_dict["scene"].items():
    # Get the mask and color for the current object
    mask_color = object_colors[object_id]
    
    # Create the bounding box with the object's centroid and extents
    bbox_min = np.array(obj_data["centroid"]) - np.array(obj_data["extents"])
    bbox_max = np.array(obj_data["centroid"]) + np.array(obj_data["extents"])
    
    # Create the bounding box geometry
    bbox = o3d.geometry.AxisAlignedBoundingBox(bbox_min, bbox_max)
    bbox.color = mask_color  # Assign color to bbox
    vis.add_geometry(bbox)

    # Use a small sphere to represent the label position near the centroid
    label_pos = np.array(obj_data["centroid"]) + np.array([0.05, 0.05, 0.05])
    label_sphere = o3d.geometry.TriangleMesh.create_sphere(radius=0.01)
    label_sphere.translate(label_pos)
    label_sphere.paint_uniform_color(mask_color)  # Color same as bounding box
    vis.add_geometry(label_sphere)
    
    # Add text to print out for reference (Open3D doesn’t support direct text in the window)
    label_text = f"{obj_data['description']} ({obj_data['confidence']:.2f})"
    print(f"Label for object {object_id}: {label_text}")

# Run the visualization
vis.run()
vis.destroy_window()


In [25]:
search_areas = {
    1: {
        "centroid": (1.19, 1.3, 0.1),
        "extents": (0.88, 0.62, 0.1),
        "short description": ("desk", "underneath"),
        "reasoning": "The trash is often found underneath desks, where waste from daily work is commonly discarded."
    },
    2: {
        "centroid": (2.29, 1.15, 0.07),
        "extents": (0.17, 0.16, 0.17),
        "short description": ("trash can", "inside of"),
        "reasoning": "A trash bucket is a primary container for trash, making it a likely place for discarded items."
    },
    3: {
        "centroid": (0.33, 0.65, 0.01),
        "extents": (0.17, 0.18, 0.15),
        "short description": ("filing cabinet", "next to"),
        "reasoning": "Trash is commonly found near filing cabinets, as people may discard items after organizing or processing documents."
    }
}


In [26]:
# Create a visualizer
vis = o3d.visualization.Visualizer()
vis.create_window()

# Draw the point cloud
pcd = o3d.io.read_point_cloud(str(pcd_path))
vis.add_geometry(pcd)

# Assign colors for each object ID
object_colors = {search_area: np.random.rand(3) for search_area in search_areas.keys()}

for search_area_id, search_area_data in search_areas.items():
    # Get the mask and color for the current object
    mask_color = object_colors[search_area_id]
    
    # Create the bounding box with the object's centroid and extents
    bbox_min = np.array(search_area_data["centroid"]) - np.array(search_area_data["extents"])
    bbox_max = np.array(search_area_data["centroid"]) + np.array(search_area_data["extents"])
    
    # Create the bounding box geometry
    bbox = o3d.geometry.AxisAlignedBoundingBox(bbox_min, bbox_max)
    bbox.color = mask_color  # Assign color to bbox
    vis.add_geometry(bbox)

    # Use a small sphere to represent the label position near the centroid
    label_pos = np.array(search_area_data["centroid"]) + np.array([0.05, 0.05, 0.05])
    label_sphere = o3d.geometry.TriangleMesh.create_sphere(radius=0.01)
    label_sphere.translate(label_pos)
    label_sphere.paint_uniform_color(mask_color)  # Color same as bounding box
    vis.add_geometry(label_sphere)
    
    # Add text to print out for reference (Open3D doesn’t support direct text in the window)
    label_text = f"{search_area_data['short description']}"
    print(f"Label for search area {search_area_id}: {label_text}")

# Run the visualization
vis.run()
vis.destroy_window()

Label for search area 1: ('desk', 'underneath')
Label for search area 2: ('trash can', 'inside of')
Label for search area 3: ('filing cabinet', 'next to')
