### Record LLM parser information locally

In [1]:
import time
import json
import openai
import numpy as np
import os
from pathlib import Path
from joblib import Parallel, delayed

In [2]:
PARSING_PROMT = """
You are an agent responsible for converting object descriptions into structured JSON data. Users will describe objects in a room, highlighting specific properties and their relations. Follow these guidelines:

1. Convert user descriptions into a JSON-compatible format.
2. Spot the primary object, termed the "target", and register its attributes. Insert these details under the "target" key.
3. Detect secondary objects described in connection with the target. Label these sequentially as "spatial object 1", "spatial object 2", and so forth, detailing their attributes.

Focus on these Properties:

- Name
- Color
- Size
- Texture
- Relation to target (if not the target itself)

Do not include quantity for target because the target is unique. For target object, only include color, size, texture. For spatial objects, include all properties.

Sample Interaction:
Input: "I envision a white, glossy cabinet. To its left, there's a small, brown, wooden table. On its right, a slightly smaller, matte table."

Expected Output:
{
    "target": {
        "name": "cabinet",
        "color": "white",
        "texture": "glossy"
    },
    "spatial object 1": {
        "name": "table",
        "color": "brown",
        "size": "small",
        "texture": "wooden",
        "relation to target": "left"
    },
    "spatial object 2": {
        "name": "table",
        "size": "smaller",
        "texture": "matte",
        "relation to target": "right"
    }
}

Please ensure the output can be used directly with Python's json.loads() function. If any property or object detail is absent, omit the relevant key.
"""

In [3]:
def ask_gpt_txt(
    system_prompt: str,
    user_input: str,
    gpt_version: str
):
    """Generates complex reasoning based on user input.

    Args:
        system_prompt (str): System prompt for GPT-4
        user_input (str): Query input for GPT-4
        scan_name (str): Name of scannet scene
        output_scenes (str): Output path for scenes
        output_json (str): Output path for json
        gpt_version (str): gpt35 or gpt4
    """

    openai.api_type = "azure"
    openai.api_version = "2023-07-01-preview"
    openai.api_base = "https://jed-chat-with-nerf.openai.azure.com/"
    openai.api_key = "97350bd01d884ddbb786483689a30b3f"

    result = None
    attempt = 0
    num_overloaded = 0
    num_load_error = 0
    num_streaming_error = 0
    collected_json = []
    # call gpt-4
    while result is None:
        attempt += 1
        if attempt > 10:
            break
        time.sleep(attempt**2)

        try:
            response = openai.ChatCompletion.create(
                engine=gpt_version,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_input},
                ],
                temperature=0.7,
                max_tokens=2000,
                top_p=0.95,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None,
                stream=True,
            )
            pure_msg = ""
            try:
                for chunk in response:
                    collected_json.append(chunk)  # save the event response
                    if len(chunk["choices"]) > 0 and len(chunk["choices"][0]["delta"]) == 1 and "content" in chunk["choices"][0]["delta"]:
                        chunk_message = chunk["choices"][0]["delta"]  # extract the message
                        pure_msg += chunk_message["content"]
                    time.sleep(0.1)
            except Exception as exp:
                num_streaming_error += 1
                print("OpenAI Response (Streaming) Error: " + str(exp))

            try:
                print(pure_msg)
                result = json.loads(pure_msg)
            except Exception as exp:
                print("OpenAI Response (JSON) Error: " + str(exp))
                num_load_error += 1
                
        except Exception as exp:
            num_overloaded += 1
            print(f"OpenAI Response Error: {exp}")
            time.sleep(attempt**2)

    return result

In [4]:
def convert_origin_bbox(bbox, axisAlignment_matrix):
    center_aligned = np.append(np.array(bbox[:3]), 1)
    extents_aligned = np.array(bbox[3:])
    
    half_extents = extents_aligned
    corners_aligned = [
        center_aligned[:3] + [-half_extents[0], -half_extents[1], -half_extents[2]],
        center_aligned[:3] + [half_extents[0], -half_extents[1], -half_extents[2]],
        center_aligned[:3] + [-half_extents[0], half_extents[1], -half_extents[2]],
        center_aligned[:3] + [half_extents[0], half_extents[1], -half_extents[2]],
        center_aligned[:3] + [-half_extents[0], -half_extents[1], half_extents[2]],
        center_aligned[:3] + [half_extents[0], -half_extents[1], half_extents[2]],
        center_aligned[:3] + [-half_extents[0], half_extents[1], half_extents[2]],
        center_aligned[:3] + [half_extents[0], half_extents[1], half_extents[2]],
    ]

    inverse_matrix = np.linalg.inv(axisAlignment_matrix)
    corners_original = [np.dot(inverse_matrix, np.append(corner, 1.0))[:3] for corner in corners_aligned]
    center_original = np.mean(corners_original, axis=0)

    extents_original = [
        np.max([corner[0] for corner in corners_original]) - np.min([corner[0] for corner in corners_original]),
        np.max([corner[1] for corner in corners_original]) - np.min([corner[1] for corner in corners_original]),
        np.max([corner[2] for corner in corners_original]) - np.min([corner[2] for corner in corners_original]),
    ]

    return corners_original, center_original, extents_original

In [5]:
def get_transformation_matrix(meta_file):
    lines = open(meta_file).readlines()
    for line in lines:
        if "axisAlignment" in line:
            axis_align_matrix = [
                float(x) for x in line.rstrip().strip("axisAlignment = ").split(" ")
            ]
            break
    axis_align_matrix = np.array(axis_align_matrix).reshape((4, 4))
    return axis_align_matrix

In [17]:
# Helper function to process each object
def process_object(object_dict):
    parsed_json = []
    for description in object_dict['description']:
        parsed_result = ask_gpt_txt(
            system_prompt=PARSING_PROMT,
            user_input=description,
            gpt_version="chat-with-nerf-gpt4"
        )
        parsed_json.append(parsed_result)
    object_dict['parsed_description'] = parsed_json
    return object_dict

def read_json_files_in_directory_get_json(directory):
    # Convert the string path to a Path object
    directory_path = Path(directory)
    root_directory = Path("/nfs/turbo/coe-chaijy/datasets/scannet/scans")

    # Iterate over all .json files in the directory and its subdirectories
    for json_file in directory_path.rglob('*.json'):
        with json_file.open('r') as file:
            data = json.load(file)
            # print(data)
            # print(data['objects'])
            scannet_name = data['extra']['scannet_id']
            specific_dir = root_directory / scannet_name
            specific_file_path = specific_dir / (scannet_name + ".txt")
            axisAlignment_matrix = get_transformation_matrix(specific_file_path)
            data['axisAlignment_matrix'] = axisAlignment_matrix.tolist()

            # Parallel processing of each object in data['objects']
            data['objects'] = Parallel(n_jobs=-1)(delayed(process_object)(obj) for obj in data['objects'])

        output_file_path = os.path.join(json_file.parent, scannet_name + ".json")
        with open(output_file_path, 'w') as json_file:
            json.dump(data, json_file, indent=4)

In [18]:
# Replace with the path to your root directory
root_directory = "/home/xuweic/chat-with-nerf-16/chat-with-nerf-eval/data/scanrefer_val"
read_json_files_in_directory_get_json(root_directory)

{
    "target": {
        "name": "shelf",
        "color": "blue"
    }
}
{
    "target": {
        "name": "armchair",
        "color": "blue"
    }
}
OpenAI Response Error: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-07-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 6 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.
OpenAI Response Error: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-07-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 5 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.
{
    "target": {
        "name": "table"
    },
    "spatial object 1": {
        "name": "couch",
        "color": "green",
        "relation to target": "by"
    }
}
{
    "targe

In [None]:
result = ask_gpt_txt(
    system_prompt=PARSING_PROMT,
    user_input="there are six brown arm chairs stacked against the white wall. they are next to two black chairs and a black couch. it appears  to be leather. it is the brown chair on the top row in the middle.",
    gpt_version="chat-with-nerf-gpt4"
)