In [1]:
from chat_with_nerf.chat.agent import Agent 
import os
import json
import numpy as np
from chat_with_nerf.chat.session import Session

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
[32;20m[2023-09-08 15:45:58,575] INFO torch.distributed.nn.jit.instantiator [<module>] [instantiator.py:21] - Created a temporary directory at /tmp/tmp5xgj7ho5[0m
[32;20m[2023-09-08 15:45:58,576] INFO torch.distributed.nn.jit.instantiator [_write] [instantiator.py:76] - Writing /tmp/tmp5xgj7ho5/_remote_module_non_scriptable.py[0m


In [2]:
root_directory = '/workspace/chat-with-nerf-eval/data/scanrefer_val'  # Assuming current directory, adjust path if needed

## Useful functions

In [None]:
def get_box3d_min_max(corner):
    ''' Compute min and max coordinates for 3D bounding box
        Note: only for axis-aligned bounding boxes

    Input:
        corners: numpy array (8,3), assume up direction is Z (batch of N samples)
    Output:
        box_min_max: an array for min and max coordinates of 3D bounding box IoU

    '''

    min_coord = corner.min(axis=0)
    max_coord = corner.max(axis=0)
    x_min, x_max = min_coord[0], max_coord[0]
    y_min, y_max = min_coord[1], max_coord[1]
    z_min, z_max = min_coord[2], max_coord[2]
    
    return x_min, x_max, y_min, y_max, z_min, z_max

In [None]:
def box3d_iou(corners1, corners2):
    ''' Compute 3D bounding box IoU.

    Input:
        corners1: numpy array (8,3), assume up direction is Z
        corners2: numpy array (8,3), assume up direction is Z
    Output:
        iou: 3D bounding box IoU

    '''
    # # corner points are in counter clockwise order
    # rect1 = [(corners1[i,0], corners1[i,2]) for i in range(3,-1,-1)]
    # rect2 = [(corners2[i,0], corners2[i,2]) for i in range(3,-1,-1)] 
    # area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1])
    # area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1])
    # inter, inter_area = convex_hull_intersection(rect1, rect2)
    # iou_2d = inter_area/(area1+area2-inter_area)
    # ymax = min(corners1[0,1], corners2[0,1])
    # ymin = max(corners1[4,1], corners2[4,1])
    # inter_vol = inter_area * max(0.0, ymax-ymin)
    # vol1 = box3d_vol(corners1)
    # vol2 = box3d_vol(corners2)
    # iou = inter_vol / (vol1 + vol2 - inter_vol)
    # return iou, iou_2d

    x_min_1, x_max_1, y_min_1, y_max_1, z_min_1, z_max_1 = get_box3d_min_max(corners1)
    x_min_2, x_max_2, y_min_2, y_max_2, z_min_2, z_max_2 = get_box3d_min_max(corners2)
    xA = np.maximum(x_min_1, x_min_2)
    yA = np.maximum(y_min_1, y_min_2)
    zA = np.maximum(z_min_1, z_min_2)
    xB = np.minimum(x_max_1, x_max_2)
    yB = np.minimum(y_max_1, y_max_2)
    zB = np.minimum(z_max_1, z_max_2)
    inter_vol = np.maximum((xB - xA), 0) * np.maximum((yB - yA), 0) * np.maximum((zB - zA), 0)
    box_vol_1 = (x_max_1 - x_min_1) * (y_max_1 - y_min_1) * (z_max_1 - z_min_1)
    box_vol_2 = (x_max_2 - x_min_2) * (y_max_2 - y_min_2) * (z_max_2 - z_min_2)
    iou = inter_vol / (box_vol_1 + box_vol_2 - inter_vol + 1e-8)

    return iou

### Data Analysis

In [None]:
def get_val_set():
    json_dict = {}
    # List of all subfolders and their files
    subfolders_files = [(dp, filenames) for dp, _, filenames in os.walk(root_directory)]

    # Dictionary comprehension to pick only the first JSON from each subfolder
    json_dict = {os.path.basename(dp): os.path.join(dp, filenames[0]) for dp, filenames in subfolders_files if any(fn.endswith('.json') for fn in filenames)}

    print(json_dict)
    return json_dict

In [None]:
json_dict = get_val_set()

In [5]:
scene_name = 'scene0025_00'
scene_path = json_dict[scene_name]
agent = Agent()
print(scene_name)
print(scene_path)
with open(scene_path, 'r') as file:
    data = json.load(file)
    
print("label: ", data['objects'][0]['label'])
print("bbox: ", data['objects'][0]['bbox'])
print("object_id: ", data['objects'][0]['object_ids'])
new_session = Session.create_for_scene(scene_name)
print("description: ", data['objects'][0]['description'])
for description in data['objects'][0]['description']:
    print(description)
    result = agent.act_no_gpt(
        description,
        scene_name,
        new_session,
    )
    print(result)
    break

[32;20m[2023-09-07 04:25:36,336] INFO chat_with_nerf [initialize_model_no_gpt_context] [model_context.py:54] - Search for all Scenes and Set the current Scene[0m
[32;20m[2023-09-07 04:25:36,338] INFO chat_with_nerf [search_scenes] [model_context.py:89] - scene_path: /workspace/chat-with-nerf-dev/chat-with-nerf/data/scene0025_00/scene0025_00.yaml[0m
[32;20m[2023-09-07 04:25:36,343] INFO root [create_model] [factory.py:154] - Loaded ViT-B-16 model config.[0m
[32;20m[2023-09-07 04:25:37,659] INFO root [create_model] [factory.py:227] - Loading pretrained ViT-B-16 weights (laion2b_s34b_b88k).[0m
scene0025_00
/workspace/chat-with-nerf-eval/data/scanrefer_val/scene0025_00/72afcc45-a8b4-48b6-8224-783ad1d1ca95.json
label:  monitor
bbox:  [-0.30863550305366516, -1.6108747720718384, 0.9761558771133423, 0.4441679120063782, 0.42981481552124023, 0.5227721333503723]
object_id:  1
[32;20m[2023-09-07 04:25:50,828] INFO chat_with_nerf [create_for_scene] [session.py:42] - Creating a new session 

In [2]:
scene_name = 'home_1'
scene_path = '/workspace/chat-with-nerf-dev/chat-with-nerf/data/home_1'
agent = Agent()
new_session = Session.create_for_scene(scene_name)
result = agent.act_no_gpt(
    "computer screen",
    scene_name,
    new_session,
)

[32;20m[2023-09-07 15:28:11,125] INFO chat_with_nerf [initialize_model_no_gpt_context] [model_context.py:54] - Search for all Scenes and Set the current Scene[0m
[32;20m[2023-09-07 15:28:22,938] INFO chat_with_nerf [search_scenes] [model_context.py:89] - scene_path: /workspace/chat-with-nerf-dev/chat-with-nerf/data/scene0025_00/scene0025_00.yaml[0m
[32;20m[2023-09-07 15:28:22,942] INFO chat_with_nerf [search_scenes] [model_context.py:89] - scene_path: /workspace/chat-with-nerf-dev/chat-with-nerf/data/home_1/home_1.yaml[0m
[32;20m[2023-09-07 15:28:27,367] INFO root [create_model] [factory.py:154] - Loaded ViT-B-16 model config.[0m
[32;20m[2023-09-07 15:28:28,706] INFO root [create_model] [factory.py:227] - Loading pretrained ViT-B-16 weights (laion2b_s34b_b88k).[0m
[32;20m[2023-09-07 15:28:58,090] INFO chat_with_nerf [create_for_scene] [session.py:42] - Creating a new session bc7da60c-19e3-43fe-9e77-ae350ecb1d19 with scene home_1.[0m
[38;20m[2023-09-07 15:29:02,947] DEBUG c

Exception in thread Thread-10 (ground_no_gpt_with_callback):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/workspace/chat-with-nerf-dev/chat-with-nerf/chat_with_nerf/chat/grounder.py", line 83, in ground_no_gpt_with_callback
    bbox = grond_no_gpt(session, ground_text, picture_taker)
  File "/workspace/chat-with-nerf-dev/chat-with-nerf/chat_with_nerf/chat/grounder.py", line 71, in grond_no_gpt
    bbox = VisualGrounder.call_visual_grounder_no_gpt(
  File "/workspace/chat-with-nerf-dev/chat-with-nerf/chat_with_nerf/visual_grounder/visual_grounder.py", line 55, in call_visual_grounder_no_gpt
    bbox = picture_taker.visual_ground_pipeline_no_gpt(
  File "/workspace/chat-with-nerf-dev/chat-with-nerf/chat_with_nerf/visual_grounder/picture_taker.py", line 121, in visual_ground_pipeline_no_gpt
    cen

In [None]:
for scene_id in scenes:
    # swap to a new scene
    llava_to_preserve = agent.model_context.captioner
    agent.new_model_context = ModelContextManager.intialize_with_give_captioner(
        llava_to_preserve
    )

    queries = get_text_queries(scene_id)
    for query in queries:  # loop over each scene using joblib
        new_session = Session.create_for_scene(scene_id)
        while True:
            (
                chat_history_for_display,
                chat_counter,
                server_status_code,
                session_state,
                model_3d_grounding_result,
            ) = agent.act(
                system_msg="Hello, I am a chatbot",
                inputs="new text from user simulator",
                top_p=0.9,
                temperature=0.1,
                dropdown_scene=scene_id,
                session=new_session,
            )  # act() only returns if the control is given back to the user
            
   in executor.wait()  # wait for all queries for this scene to finish