### Importing necessary libraries

In [None]:
import os
import json
import pandas as pd
import random

### Function to extract EnergyPlus objects From IDF files

In [None]:
import re

def extract_objects(idf_file_path):
    """Extracts strictly matching Zone, Space, SpaceList, BuildingSurface:Detailed, and FenestrationSurface:Detailed objects."""
    with open(idf_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    current_obj = []
    extracted_objects = {key: [] for key in [
        "Zone,", "Space,", "SpaceList,", 
        "BuildingSurface:Detailed,", "FenestrationSurface:Detailed,"
    ]}

    inside_object = None  # Track current object type

    for line in lines:

        # Remove comments (everything after '!')
        # line = line.strip() # including comments
        line = line.split('!')[0].strip()
        if not line:
            continue  # Skip empty or comment-only lines

        # Normalize spacing
        clean_line = " ".join(line.split())  
        clean_line = clean_line.replace(" ,", ",").replace(", ", ",")

        # Detect start of a new object
        for key in extracted_objects.keys():
            if clean_line.startswith(key):
                inside_object = key
                current_obj = [clean_line]
                break
        else:
            if inside_object:
                current_obj.append(clean_line)

        # If line ends with ';', finalize the object
        if ";" in clean_line and inside_object:
            # Join, remove extra spaces/newlines
            complete_object = " ".join(current_obj).replace("\n", "").strip()
            extracted_objects[inside_object].append(complete_object)
            inside_object = None

    return [extracted_objects[key] for key in extracted_objects]

### Various prompt for creating dataset with different queries

In [None]:
# Function to generate a user query
def generate_combined_query(description):
    """Returns a user query asking for a Zone, Space, SpaceList, BuildingSurface, and FenestrationSurface."""
    L = float(description['L'])
    W = float(description['W'])
    H = float(description['H'])
    FA = float(description['FA'])
    AR = float(description['AR'])
    WWR = float(description['WWR'])
    templates = [
        #  f"Generate an EnergyPlus IDF for the geometric parameters with " \
        #  f"Length of {description['L']:.2f} meter, Width of {description['W']:.2f} meter and Height of {description['H']:.2f} meter and " \
        #  f"window-to-wall ratio to each of four Wall, WWR is {description['WWR']:.2f} " \
        #  f"Define one Zone, one Space, and one SpaceList. " \
        #  f"There will be six BuildingSurface:Detailed objects such that four Wall, one Roof, one Floor. " \
        #  f"There will be total four FenestrationSurface:Detailed (window) objects and each of them centered on each wall. " \
        #  f"Use meters as units. " \
        #  f"Ensure windows fit fully inside walls and do not overlap edges. " \
        #  f"Output only the IDF code snippet with no comments or extra text."

         f"Create an EnergyPlus IDF snippet for a rectangular building with " \
         f"Length {description['L']:.2f} m, Width {description['W']:.2f} m, and Height {description['H']:.2f} m. " \
         f"Apply a window-to-wall ratio (WWR) of {description['WWR']:.2f} to each of the four walls. " \
         f"Define exactly one Zone, one Space, and one SpaceList. " \
         f"Include six BuildingSurface:Detailed objects (four Walls, one Roof, one Floor). " \
         f"Add four FenestrationSurface:Detailed objects (windows), each centered on its respective wall. " \
         f"Use meters as the unit system. " \
         f"Ensure each window fits entirely within its wall boundaries and does not overlap edges. " \
         f"Return only the IDF code snippet, with no extra explanation or comments."
   
        # f"""
        # Generate an EnergyPlus IDF for the geometric parameters with\
        # Length of {description['L']:.2f} meter, Width of {description['W']:.2f} meter and Height of {description['H']:.2f} meter and\
        # window-to-wall ratio to each of four Wall, WWR is {description['WWR']:.2f}\
        # Define one Zone, one Space, and one SpaceList.\
        # There will be six BuildingSurface:Detailed objects such that four Wall, one Roof, one Floor.\
        # Four of the Wall objects must be named 'zone1.00_Space - Wall xxx:a' where xxx is in degrees (000, 090, 180, 270).\
        # There will be total four FenestrationSurface:Detailed (window) objects and each of them centered on each wall.\
        # Window names must be 'zone1.00_Space - Wall xxx:a - Sub:a', linked to Wall.\
        # Window area equals corresponding to wall area times WWR\
        # Use meters as units.\
        # Ensure windows fit fully inside walls and do not overlap edges.\
        # Output only the IDF code snippet with no comments or extra text.
        # """

    #     f"""
    #     Create an EnergyPlus IDF snippet with the following requirements:

    #     - One Zone, one Space, and one SpaceList named consistently.
    #     - Six BuildingSurface:Detailed objects representing four walls, one floor, and one roof.
    #     - Four FenestrationSurface:Detailed windows, each centered on one of the four walls.
    #     - Walls named as 'zone1.00_Space - Wall angle:.4f:a' with angles 0, 90, 180, 270 degrees.
    #     - Windows named as 'zone1.00_Space - Wall angle:.4f:a - Sub:a', linked to the corresponding wall.
    #     - Window area = wall area * WWR
    #     - Vertices for all surfaces start at (0.0, 0.0, 0.0) and are ordered clockwise.
    #     - Windows must be rectangular, centered, and fully inside wall boundaries.
    #     - Use metric units (meters) for all coordinates.
    #     - Only output the correctly formatted IDF objects, no additional explanation.

    #     Building Parameters:
    #     - Length = {description['L']:.2f} m
    #     - Width = {description['W']:.2f} m
    #     - Height = {description['H']:.2f} m
    #     - Floor Area = {description['FA']:.2f} m²
    #     - Aspect Ratio = {description['AR']:.2f}
    #     - Window-to-Wall Ratio (WWR) = {description['WWR']:.2f}
    #    """
    #    f"""
    #     Create an EnergyPlus IDF snippet with:
    #     - One Zone, Space, and SpaceList
    #     - Six BuildingSurface:Detailed surfaces (4 walls, roof, floor)
    #     - Four FenestrationSurface:Detailed windows centered on walls

    #     Step 1: Define wall vertices clockwise from (0,0,0) using length (L), width (W), and height (H).
    #     Step 2: Calculate wall centroids.
    #     Step 3: Calculate window area = WWR * wall area.
    #     Step 4: Calculate window width and height = wall width/height * sqrt(WWR).
    #     Step 5: Center windows on wall centroids and define vertices accordingly.
    #     Step 6: Ensure windows fit inside walls, coplanar and clockwise.

    #     Building Parameters:
    #     - Length = {description['L']:.2f} m
    #     - Width = {description['W']:.2f} m
    #     - Height = {description['H']:.2f} m
    #     - Floor Area = {description['FA']:.2f} m²
    #     - Aspect Ratio = {description['AR']:.2f}
    #     - Window-to-Wall Ratio (WWR) = {description['WWR']:.2f}
    #    Include only geometry-related IDF objects.
    #    """
    # f"""
    # Create an EnergyPlus IDF snippet for a rectangular building with these requirements.
    # Building Parameters:
    # - Length = {description['L']:.2f} m
    # - Width = {description['W']:.2f} m
    # - Height = {description['H']:.2f} m
    # - Floor Area = {description['FA']:.2f} m²
    # - Aspect Ratio = {description['AR']:.2f}
    # - Window-to-Wall Ratio (WWR) = {description['WWR']:.2f}

    # Requirements:
    # 1. Define exactly 1 Zone, 1 Space, and 1 SpaceList.
    # 2. Create 6 BuildingSurface:Detailed objects:
    # - 4 walls,
    # - 1 roof,
    # - 1 floor.
    # 3. Add one rectangular FenestrationSurface:Detailed window centered on each wall.
    # - Window size based on WWR and wall geometry.
    # 4. Geometry Rules:
    # - Vertices start at (0.00, 0.00, 0.00) and proceed clockwise.
    # - Units are meters (SI).
    # - Roof elevation Z = building Height.
    # - Floor elevation Z = 0.00.
    # 5. Calculate for each wall:
    # - Wall area = Wall Width * Wall Height.
    # - Window area = WWR * Wall Area.
    # - Window Width = Wall Width * sqrt(WWR).
    # - Window Height = Wall Height * sqrt(WWR).
    # - Center the window on wall centroid.
    # - Ensure windows fully fit inside wall and are coplanar, clockwise vertex order.
    # 6. Use consistent naming conventions for walls and windows.
    # 7. Output only the IDF geometry snippet with Zone, Space, SpaceList, BuildingSurface:Detailed, and FenestrationSurface:Detailed objects.

    # Include only geometry-related IDF objects.
    # """
    ]
    return random.choice(templates)

### Choose the file path

All the data related to design matrix based on which simulations are done are in Excel (csv) file. They are data about L, W, CH, FA, WWR, AR, V and EUI
Idf file from where EnergyPlus objects will be extracted from are in idf_rectangle folder.

In [None]:
# Define file paths
excel_file_path = r"C:\Users\Desktop\LLM\idf_rectangle\data.csv"
idf_folder_path = r'C:\Users\Desktop\LLM\idf_rectangle'
output_json_path = 'BuildingGeometricDataset1.json'

### Creating dictionary 
Dictionary of ata containing key= ID or design matrix, value = set of {L, W, CH, WWR, AR, FA, V, EUI}

In [None]:
# Load building descriptions
if not os.path.exists(excel_file_path):
    raise FileNotFoundError(f"CSV file not found: {excel_file_path}")

df = pd.read_csv(excel_file_path)
# Convert all necessary columns to float explicitly
float_columns = ['L', 'W', 'H', 'WWR', 'AR', 'EUI', 'FA']
df[float_columns] = df[float_columns].astype(float)

building_data = df.set_index('ID').to_dict(orient='index')

building_data

### Creating the dataset with Query and Answer

###

In [None]:
import random

# Set the random seed
random.seed(123)

# Initialize JSON pairs list
json_pairs = []

# Process each IDF file
for idf_file in os.listdir(idf_folder_path):
    if idf_file.endswith('.idf') and idf_file.startswith("in"):
        idf_file_path = os.path.join(idf_folder_path, idf_file)

        try:
            building_id = int(idf_file.replace("in", "").replace(".idf", ""))
            description = building_data.get(building_id)
            if not description:
                print(f"Warning: No description found for {idf_file}")
                continue
        except ValueError:
            print(f"Skipping {idf_file} - Invalid filename format.")
            continue

        # Extract EnergyPlus objects
        zones, spaces, space_lists, building_surfaces, fenestration_surfaces = extract_objects(idf_file_path)

        # Debugging: Print number of extracted components
        print(f"Processing {idf_file}: Zones({len(zones)}), Spaces({len(spaces)}), "
              f"SpaceLists({len(space_lists)}), Surfaces({len(building_surfaces)}), Windows({len(fenestration_surfaces)})")

        # Ensure at least some components exist
        if any([zones, spaces, space_lists, building_surfaces, fenestration_surfaces]):
            json_pairs.append({
                "user": generate_combined_query(description),
                "assistant": "".join(zones + spaces + space_lists + building_surfaces + fenestration_surfaces)
            })
        else:
            print(f"Skipping {idf_file} - No relevant objects found.")


In [None]:
# Save dataset to JSON
with open(output_json_path, 'w', encoding='utf-8') as json_file:
    json.dump(json_pairs, json_file, indent=2)

print(f"Fine-tuning dataset saved to {output_json_path}")

### Cleaning the json file

In [None]:
## Making 0.2 or two significant digitsimport json
import re
file_path = r"C:\Users\Desktop\LLM\BuildingGeometricDataset1.json"

def round_numbers_in_text(text, precision=2):
    # Match only floating point numbers (with decimals), including negatives
    number_pattern = re.compile(r'-?\d+\.\d+')

    def round_match(match):
        num = float(match.group())
        return f"{num:.{precision}f}"

    return number_pattern.sub(round_match, text)

def round_json_numbers(json_data, precision=2):
    for item in json_data:
        for key in item:
            if isinstance(item[key], str):
                item[key] = round_numbers_in_text(item[key], precision)
    return json_data

In [None]:
# === Usage ===
# Load your JSON file
with open(file_path, "r") as f:
    data = json.load(f)

# Round numbers in the JSON
rounded_data = round_json_numbers(data, precision=2)

In [None]:
# Save the modified JSON
with open(file_path, "w") as f:
    json.dump(rounded_data, f, indent=2)

templates = [
        f"Create an EnergyPlus IDF component including a Zone, Space, and SpaceList along with a building surface and its fenestration."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building details: FA={description['FA']:.4f} m2, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Generate an IDF snippet defining a Zone, Space, SpaceList, a building surface, and an associated window/aperture."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building: {description['FA']:.4f} m2, {description['L']:.4f}m x {description['W']:.4f}m x {description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Provide an EnergyPlus IDF file that includes a Zone, Space, SpaceList, a building surface, and a window/aperture."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Details: L={description['L']:.4f}m, W={description['W']:.4f}m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Give me EnergyPlus IDF file that includes a Zone, Space, SpaceList, a building surface, and a window/aperture."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Details: L={description['L']:.4f}m, W={description['W']:.4f}m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Make IDF defining a Zone, Space, SpaceList, a building surface, and an associated fenestration."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building: {description['FA']:.4f} m2, {description['L']:.4f}m x {description['W']:.4f}m x {description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Construct an EnergyPlus IDF file featuring a Zone, Space, SpaceList, a building surface, and its fenestration.\n"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Specifications: Floor Area={description['FA']:.4f} sq.m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, Aspect Ratio={description['AR']:.4f}, Window-to-Wall Ratio={description['WWR']:.2f}.",
    
        f"Dram an IDF file that includes a Zone, Space, SpaceList, a building surface, and a window or other fenestration element."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building properties: {description['FA']:.4f} sq.m total, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Generate an EnergyPlus IDF section containing a Zone, Space, SpaceList, a building surface with fenestration, and necessary geometric details."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Dimensions: {description['L']:.4f} m (L) x {description['W']:.4f} m (W) x {description['H']:.4f} m (H), Total Area={description['FA']:.4f} sq.m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Create a valid IDF file for EnergyPlus that defines a Zone, Space, SpaceList, a building envelope surface, and an opening such as a window."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Given parameters: FA={description['FA']:.4f} square m, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Write an IDF script containing a Zone, Space, SpaceList, an building surface, and its corresponding fenestration."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Input specifications: Floor Area={description['FA']:.4f} square m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
        
        f"Design an IDF file that incorporates a Zone, Space, SpaceList, a key building surface, and its fenestration for EnergyPlus simulation."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Parameters: Floor Area={description['FA']:.4f} square feet, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Prepare an EnergyPlus IDF definition that includes a Zone, Space, SpaceList, an building surface, and an associated window or aperture."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building configuration: {description['FA']:.4f} square feet, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Write an IDF file for a building with a Zone, Space, SpaceList, a building surface, and an opening fenestration for EnergyPlus analysis."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Specifications: L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, FA={description['FA']:.4f} sq. feet, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Develop an EnergyPlus IDF file that defines a Zone, Space, SpaceList, building surface and fenestration element, and other necessary details."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Inputs: Floor Area={description['FA']:.4f} sq m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Formulate an IDF snippet that includes a Zone, Space, SpaceList, building surface, and a window or glazing element."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building parameters: {description['FA']:.4f} sq m total, Dimensions: {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"How can I generate an EnergyPlus IDF file that includes a Zone, Space, SpaceList, a building surface, and a window?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"My building details are: Floor Area={description['FA']:.4f} SF, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, Aspect Ratio={description['AR']:.4f}, and Window-to-Wall Ratio={description['WWR']:.2f}.",
    
        f"Can you provide an IDF file containing a Zone, Space, SpaceList, building surface, and its fenestration?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"The building parameters are: {description['FA']:.4f} SF total, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"What would an EnergyPlus IDF look like for a building with a Zone, Space, SpaceList, a building surface, and fenestration?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Here are my specifications: L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, FA={description['FA']:.4f} m2, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Can you help me create an IDF snippet for EnergyPlus that includes a Zone, Space, SpaceList, a building surface, and a glazing element?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"The building details are: {description['FA']:.4f} sq m , Dimensions: {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"What is the correct IDF structure for defining a Zone, Space, SpaceList, an building surface, and a fenestration in EnergyPlus?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"My building specifications are: Floor Area={description['FA']:.4f} m2, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    ]