In [1]:
import os
import pandas as pd
import random
import re
import math
import json

In [2]:
def parse_user_query(user_query: str):
    """
    Extracts building parameters (L, W, H, WWR, occ_rate, lighting, infiltration)
    from the user query text. Supports different phrasings.
    """
    # Pattern with "meters long, meters wide, meters high"
    pattern1 = re.compile(
        r"(\d+\.\d+)\s*meters long.*?"
        r"(\d+\.\d+)\s*meters wide.*?"
        r"(\d+\.\d+)\s*meters high.*?"
        r"window-to-wall ratio.*?(\d+\.\d+).*?"
        r"occupancy.*?(\d+\.\d+).*?"
        r"lighting.*?(\d+\.\d+).*?"
        r"infiltration.*?(\d+\.\d+)",
        re.IGNORECASE | re.DOTALL
    )
    match = pattern1.search(user_query)
    if match:
        return tuple(map(float, match.groups()))

    # Pattern with "Length = ..., Width = ..., Height = ..."
    pattern2 = re.compile(
        r"Length\s*=\s*(\d+\.\d+)\s*m.*?"
        r"Width\s*=\s*(\d+\.\d+)\s*m.*?"
        r"Height\s*=\s*(\d+\.\d+)\s*m.*?"
        r"window-to-wall ratio.*?(\d+\.\d+).*?"
        r"occupancy.*?(\d+\.\d+).*?"
        r"lighting.*?(\d+\.\d+).*?"
        r"infiltration.*?(\d+\.\d+)",
        re.IGNORECASE | re.DOTALL
    )
    match = pattern2.search(user_query)
    if match:
        return tuple(map(float, match.groups()))

    # Pattern with bullet style "- Length: ... meters"
    pattern3 = re.compile(
        r"Length[:=]?\s*(\d+\.\d+)\s*meters?.*?"
        r"Width[:=]?\s*(\d+\.\d+)\s*meters?.*?"
        r"Height[:=]?\s*(\d+\.\d+)\s*meters?.*?"
        r"window-to-wall ratio.*?(\d+\.\d+).*?"
        r"(?:occupancy.*?density.*?|accommodate.*?density.*?)(\d+\.\d+).*?"
        r"(?:lighting.*?|indoor lighting.*?)(\d+\.\d+).*?"
        r"infiltration.*?(\d+\.\d+)",
        re.IGNORECASE | re.DOTALL
    )
    match = pattern3.search(user_query)
    if match:
        return tuple(map(float, match.groups()))

    raise ValueError(" ")

def build_reasoning(user_query: str):
    L, W, H, WWR, occ_rate, lighting, infiltration = parse_user_query(user_query)
    k = math.sqrt(WWR)

    # Window sizes
    win_w_N = k * L
    win_h_N = k * H
    win_w_E = k * W
    win_h_E = k * H

    return (
        f"The building has length L = {L:.2f} m, width W = {W:.2f} m, height H = {H:.2f} m, "
        f"and a window-to-wall ratio WWR = {WWR:.2f}. "
        f"The volume of the zone will be L*W*H = {L:.2f}*{W:.2f}*{H:.2f} = {L*W*H:.2f} m³. "

        f"The window scale factor is sqrt(WWR) = {k:.4f}, which is used to compute window width and height while preserving wall aspect ratio. "
        f"\n\nThe floor lies at z=0 with coordinates (L,W,z)=({L:.2f},{W:.2f},0), (L,0,z)=({L:.2f},0,0), (0,0,z)=(0,0,0) and (0,W,z)=(0,{W:.2f},0). "
        f"The roof lies at z={H:.2f} with coordinates (0,W,z)=(0,{W:.2f},{H:.2f}), (0,0,z)=(0,0,{H:.2f}), (L,0,z)=({L:.2f},0,{H:.2f}), and (L,W,z)=({L:.2f},{W:.2f},{H:.2f}). "
        f"\n\nThe north wall is at y={W:.2f} with corners (L,W,H)=({L:.2f},{W:.2f},{H:.2f}), (L,W,0), (0,W,0), (0,W,H). "
        f"The east wall is at x={L:.2f} with corners (L,0,H), (L,0,0), (L,W,0), (L,W,H). "
        f"The south wall is at y=0 with corners (0,0,H), (0,0,0), (L,0,0), (L,0,H). "
        f"The west wall is at x=0 with corners (0,W,H), (0,W,0), (0,0,0), (0,0,H). "
        f"\n\nEach wall has a centered window: "
        f"\n- North wall have length of {L:.2f} and height of {H:.2f}, window width = k*length = {win_w_N:.2f}, window height = k*height = {win_h_N:.2f}. "
        f"The window is centered, with x ranging from (L-k*length)/2={(L-win_w_N)/2:.2f} to (L+k*length)/2={(L+win_w_N)/2:.2f}, and z ranging from (H-k*height)/2={(H-win_h_N)/2:.2f} to (H+k*height)/2={(H+win_h_N)/2:.2f}. "
        f"\n- East wall have width of {W:.2f} and height of {H:.2f}, window width = k*width = {win_w_E:.2f}, window height = k*height = {win_h_E:.2f}. "
        f"The window is centered, with y ranging from (W-k*width)/2={(W-win_w_E)/2:.2f} to (W+k*width)/2={(W+win_w_E)/2:.2f}, and z ranging from (H-k*height)/2={(H-win_h_E)/2:.2f} to (H+k*height)/2={(H+win_h_E)/2:.2f}. "
        f"\n- South wall is identical in dimensions to north, window coordinates mirror accordingly at y=0. "
        f"\n- West wall is identical in dimensions to east, window coordinates mirror accordingly at x=0. "
        f"\n\nThe geometry dictionary returned is: "
        f"{{'floor': floor, 'roof': roof, 'walls': {{'Wall 000:a': wall_N, 'Wall 090:a': wall_E, 'Wall 180:a': wall_S, 'Wall 270:a': wall_W}}, "
        f"'windows': {{'Wall 000:a - Sub:a': win_N, 'Wall 090:a - Sub:a': win_E, 'Wall 180:a - Sub:a': win_S, 'Wall 270:a - Sub:a': win_W}}}}."
    )


In [3]:
def add_reasoning_to_json(input_file: str, output_file: str, limit: int = None):
    # Load dataset
    with open(input_file, "r") as f:
        data = json.load(f)

    # If limit is given, take only first N records
    if limit:
        data = data[:limit]

    # Process each record
    for item in data:
        try:
            item["reasoning"] = build_reasoning(item["user"])
        except Exception as e:
            item["reasoning"] = f"Parsing failed: {e}"

    # Save new dataset
    with open(output_file, "w") as f:
        json.dump(data, f, indent=2)


# === Run example ===
if __name__ == "__main__":
    add_reasoning_to_json("BuildingGeomFull.json", "BuildingGeomFull-reason.json", limit=4500)