### Importing necessary libraries

In [None]:
import os
import json
import pandas as pd
import random

### Function to extract EnergyPlus objects From IDF files

In [None]:
def extract_all_objects_clean_comments(idf_file_path):
    """Extract all objects from the IDF file, removing comments but keeping all content."""
    with open(idf_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    current_obj = []
    extracted_objects = []

    for line in lines:
        # Remove comments (everything after '!')
        clean_line = line.split('!')[0].rstrip()
        
        if not clean_line.strip():
            continue  # skip empty lines after comment removal

        current_obj.append(clean_line)

        # If line ends with ';' (object ends), finalize and store it
        if clean_line.endswith(';'):
            # Join lines with single spaces preserving structure
            complete_object = " ".join(line.strip() for line in current_obj)
            extracted_objects.append(complete_object)
            current_obj = []

    # If any leftover lines not closed with ';' (rare), optionally add them too
    if current_obj:
        complete_object = " ".join(line.strip() for line in current_obj)
        extracted_objects.append(complete_object)

    return extracted_objects

### Various prompt for creating dataset with different queries

In [None]:
# Function to generate a user query
def generate_combined_query(description):
    """Returns a user query asking IDF objects."""
    L = float(description['L'])
    W = float(description['W'])
    H = float(description['H'])
    FA = float(description['FA'])
    AR = float(description['AR'])
    WWR = float(description['WWR'])
    templates = [
        f"""
        Write an EnergyPlus IDF snippet for a single rectangular building.
        Assumptions:
        - Vertices start from (0.0, 0.0, 0.0) and proceed clockwise when viewed externally
        - Windows must be fully contained and centered on each wall
        - Use SI units only

        Geometry:
        - Length = {description['L']:.2f} m, Width = {description['W']:.2f} m, Height = {description['H']:.2f} m
        - Floor Area = {description['FA']:.2f} m², Aspect Ratio = {description['AR']:.2f}, WWR = {description['WWR']:.2f}
        """,

        f"""
        Generate an EnergyPlus IDF snippet.
        Instructions:
        - Begin at (0.0, 0.0, 0.0); define vertices in clockwise order
        - Each wall has 4 vertices; window must be rectangular and centered
        - Use metric units

        Building Specs:
        - Length = {description['L']:.2f} m, Width = {description['W']:.2f} m, Height = {description['H']:.2f} m
        - Floor Area = {description['FA']:.2f}, AR = {description['AR']:.2f}, WWR = {description['WWR']:.2f}
        """,

        f"""
        Create an IDF code snippet for a rectangular building.
        Geometry Rules:
        - Vertices for all surfaces must start at (0.0, 0.0, 0.0) and be ordered clockwise
        - Windows must be centered and not exceed the wall boundary
        - Use meters and SI units

        Building Specs:
        - Length = {description['L']:.2f} m, Width = {description['W']:.2f} m, Height = {description['H']:.2f} m
        - Aspect Ratio = {description['AR']:.2f}, WWR = {description['WWR']:.2f}
        """,

        f"""
        Define a complete EnergyPlus idf FILE.
        Surface Instructions:
        - Use clockwise order starting from the front-lower-left corner (0.0, 0.0, 0.0)
        - Each surface must include exactly 4 vertices
        - Place rectangular windows in the center of each wall

        Building Details:
        - Floor Area = {description['FA']:.2f} m², L = {description['L']:.2f} m, W = {description['W']:.2f} m, H = {description['H']:.2f} m
        - AR = {description['AR']:.2f}, WWR = {description['WWR']:.2f}
        """,

        f"""
        Generate IDF input for a single-story rectangular room.
        Assumptions:
        - Use SI units and EnergyPlus syntax
        - Surfaces begin at (0,0,0) and follow clockwise vertex order

        Parameters:
        - Length: {description['L']:.2f} m, Width: {description['W']:.2f} m, Height: {description['H']:.2f} m
        - Floor Area: {description['FA']:.2f} m²
        - Aspect Ratio: {description['AR']:.2f}, WWR: {description['WWR']:.2f}
        """,

        f"""
        Write an EnergyPlus IDF.
        Details:
        - Window area must conform to the WWR
        - All surface vertices must be in clockwise order starting from (0.0, 0.0, 0.0)
        - Surfaces must be unique

        Geometry:
        - L = {description['L']:.2f}, W = {description['W']:.2f}, H = {description['H']:.2f}
        - FA = {description['FA']:.2f}, AR = {description['AR']:.2f}, WWR = {description['WWR']:.2f}
        """,

        f"""
        I need an EnergyPlus IDF block for a box-shaped building.
        Additional Instructions:
        - Walls must use clockwise vertex ordering
        - All surfaces and windows must be rectangular
        - Begin coordinates at (0.0, 0.0, 0.0)

        Use these geometry inputs:
        - Length = {description['L']:.2f} m, Width = {description['W']:.2f} m, Height = {description['H']:.2f} m
        - Floor Area = {description['FA']:.2f} m², AR = {description['AR']:.2f}, WWR = {description['WWR']:.2f}
        """,

        f"""
        Create an EnergyPlus IDF snippet.
        Assumptions:
        - Clockwise vertex order
        - Base corner at (0.0, 0.0, 0.0)
        - SI units only

        Geometry Specs:
        - L = {description['L']:.2f} m, W = {description['W']:.2f} m, H = {description['H']:.2f} m
        - FA = {description['FA']:.2f} m², AR = {description['AR']:.2f}, WWR = {description['WWR']:.2f}
        """,

        f"""
        Write IDF code for a room.
        Instructions:
        - Wall surface vertex order is clockwise starting from (0.0, 0.0, 0.0)
        - Windows must obey WWR and be centered

        Inputs:
        - Length = {description['L']:.2f}, Width = {description['W']:.2f}, Height = {description['H']:.2f}
        - Floor Area = {description['FA']:.2f}, AR = {description['AR']:.2f}, WWR = {description['WWR']:.2f}
        """,

        f"""
        Create IDF geometry for a single rectangular space.
        Use these rules:
        - Surfaces start at (0, 0, 0), vertex order is clockwise
        - Windows must not overlap edges
        - All dimensions in meters

        Geometry:
        - L = {description['L']:.2f}, W = {description['W']:.2f}, H = {description['H']:.2f},
        - FA = {description['FA']:.2f}, AR = {description['AR']:.2f}, WWR = {description['WWR']:.2f}
        """,

        f"""
        Generate an IDF code block.
        Instructions:
        - Base point is (0.0, 0.0, 0.0)
        - Use clockwise surface definition
        - Center all windows both horizontally and vertically on the wall

        Input:
        - Length = {description['L']:.2f}, Width = {description['W']:.2f}, Height = {description['H']:.2f},
        - Floor Area = {description['FA']:.2f}, AR = {description['AR']:.2f}, WWR = {description['WWR']:.2f}

        """
        ]
    return random.choice(templates)

### Choose the file path

All the data related to design matrix based on which simulations are done are in Excel (csv) file. They are data about L, W, CH, FA, WWR, AR, V and EUI
Idf file from where EnergyPlus objects will be extracted from are in idf_rectangle folder.

In [None]:
# Define file paths
excel_file_path = r"C:\Users\Jayedi Aman\OneDrive - University of Missouri\Desktop\LLM\idf_rectangle\data.csv"
idf_folder_path = r'C:\Users\Jayedi Aman\OneDrive - University of Missouri\Desktop\LLM\idf_rectangle'
output_json_path = 'BuildingGeomFull1.json'

### Creating dictionary 
Dictionary of ata containing key= ID or design matrix, value = set of {L, W, CH, WWR, AR, FA, V, EUI}

In [None]:
# Load building descriptions
if not os.path.exists(excel_file_path):
    raise FileNotFoundError(f"CSV file not found: {excel_file_path}")

df = pd.read_csv(excel_file_path)
# Convert all necessary columns to float explicitly
float_columns = ['L', 'W', 'H', 'WWR', 'AR', 'EUI', 'FA']
df[float_columns] = df[float_columns].astype(float)

building_data = df.set_index('ID').to_dict(orient='index')

building_data

### Creating the dataset with Query and Answer

###

In [None]:
import random

# Set the random seed
random.seed(42)

# Initialize JSON pairs list
json_pairs = []

# Process each IDF file
for idf_file in os.listdir(idf_folder_path):
    if idf_file.endswith('.idf') and idf_file.startswith("in"):
        idf_file_path = os.path.join(idf_folder_path, idf_file)

        try:
            building_id = int(idf_file.replace("in", "").replace(".idf", ""))
            description = building_data.get(building_id)
            if not description:
                print(f"Warning: No description found for {idf_file}")
                continue
        except ValueError:
            print(f"Skipping {idf_file} - Invalid filename format.")
            continue

        # Extract EnergyPlus objects
        all_objects = extract_all_objects_clean_comments(idf_file_path)
        
        print(f"Processing {idf_file}: TotalObjects({len(all_objects)})")

        if all_objects:
            json_pairs.append({
                "user": generate_combined_query(description),
                "assistant": "\n".join(all_objects)
            })
        else:
            print(f"Skipping {idf_file} - No IDF objects found.")

In [None]:
# Save dataset to JSON
with open(output_json_path, 'w', encoding='utf-8') as json_file:
    json.dump(json_pairs, json_file, indent=2)

print(f"Fine-tuning dataset saved to {output_json_path}")

### Cleaning the json file

In [None]:
## Making 0.2 or two significant digitsimport json
import re
file_path = r"C:\Users\Jayedi Aman\OneDrive - University of Missouri\Desktop\LLM\BuildingGeomFull1.json"

def round_numbers_in_text(text, precision=2):
    # Match float or int numbers in string, including negatives
    number_pattern = re.compile(r'-?\d+\.\d+|-?\d+')

    def round_match(match):
        num = float(match.group())
        return f"{num:.{precision}f}"

    return number_pattern.sub(round_match, text)

def round_json_numbers(json_data, precision=2):
    for item in json_data:
        for key in item:
            if isinstance(item[key], str):
                item[key] = round_numbers_in_text(item[key], precision)
    return json_data

In [None]:
# === Usage ===
# Load your JSON file
with open(file_path, "r") as f:
    data = json.load(f)

# Round numbers in the JSON
rounded_data = round_json_numbers(data, precision=2)

In [None]:
# Save the modified JSON
with open(file_path, "w") as f:
    json.dump(rounded_data, f, indent=2)