### Importing necessary libraries

In [1]:
import os
import json
import pandas as pd
import random

### Function to extract EnergyPlus objects From IDF files

In [2]:
import re

def extract_objects(idf_file_path):
    """Extracts strictly matching Zone, Space, SpaceList, BuildingSurface:Detailed, and FenestrationSurface:Detailed objects."""
    with open(idf_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    current_obj = []
    extracted_objects = {key: [] for key in [
        "Zone,", "Space,", "SpaceList,", 
        "BuildingSurface:Detailed,", "FenestrationSurface:Detailed,"
    ]}

    inside_object = None  # Track current object type

    for line in lines:

        # Remove comments (everything after '!')
        # line = line.strip() # including comments
        line = line.split('!')[0].strip()
        if not line:
            continue  # Skip empty or comment-only lines

        # Normalize spacing
        clean_line = " ".join(line.split())  
        clean_line = clean_line.replace(" ,", ",").replace(", ", ",")

        # Detect start of a new object
        for key in extracted_objects.keys():
            if clean_line.startswith(key):
                inside_object = key
                current_obj = [clean_line]
                break
        else:
            if inside_object:
                current_obj.append(clean_line)

        # If line ends with ';', finalize the object
        if ";" in clean_line and inside_object:
            complete_object = " ".join(current_obj)
            extracted_objects[inside_object].append(complete_object)
            inside_object = None

    return [extracted_objects[key] for key in extracted_objects]

### Various prompt for creating dataset with different queries

In [3]:
# Function to generate a user query
def generate_combined_query(description):
    """Returns a user query asking for a Zone, Space, SpaceList, BuildingSurface, and FenestrationSurface."""
    L = float(description['L'])
    W = float(description['W'])
    H = float(description['H'])
    FA = float(description['FA'])
    AR = float(description['AR'])
    WWR = float(description['WWR'])
    templates = [
        f"Create an EnergyPlus IDF component including a Zone, Space, and SpaceList along with a building surface and its fenestration."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building details: FA={description['FA']:.4f} m2, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Generate an IDF snippet defining a Zone, Space, SpaceList, a building surface, and an associated window/aperture."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building: {description['FA']:.4f} m2, {description['L']:.4f}m x {description['W']:.4f}m x {description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Provide an EnergyPlus IDF file that includes a Zone, Space, SpaceList, a building surface, and a window/aperture."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Details: L={description['L']:.4f}m, W={description['W']:.4f}m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Give me EnergyPlus IDF file that includes a Zone, Space, SpaceList, a building surface, and a window/aperture."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Details: L={description['L']:.4f}m, W={description['W']:.4f}m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Make IDF defining a Zone, Space, SpaceList, a building surface, and an associated fenestration."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building: {description['FA']:.4f} m2, {description['L']:.4f}m x {description['W']:.4f}m x {description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Construct an EnergyPlus IDF file featuring a Zone, Space, SpaceList, a building surface, and its fenestration.\n"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Specifications: Floor Area={description['FA']:.4f} sq.m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, Aspect Ratio={description['AR']:.4f}, Window-to-Wall Ratio={description['WWR']:.2f}.",
    
        f"Dram an IDF file that includes a Zone, Space, SpaceList, a building surface, and a window or other fenestration element."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building properties: {description['FA']:.4f} sq.m total, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Generate an EnergyPlus IDF section containing a Zone, Space, SpaceList, a building surface with fenestration, and necessary geometric details."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Dimensions: {description['L']:.4f} m (L) x {description['W']:.4f} m (W) x {description['H']:.4f} m (H), Total Area={description['FA']:.4f} sq.m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Create a valid IDF file for EnergyPlus that defines a Zone, Space, SpaceList, a building envelope surface, and an opening such as a window."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Given parameters: FA={description['FA']:.4f} square m, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Write an IDF script containing a Zone, Space, SpaceList, an building surface, and its corresponding fenestration."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Input specifications: Floor Area={description['FA']:.4f} square m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
        
        f"Design an IDF file that incorporates a Zone, Space, SpaceList, a key building surface, and its fenestration for EnergyPlus simulation."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Parameters: Floor Area={description['FA']:.4f} square feet, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Prepare an EnergyPlus IDF definition that includes a Zone, Space, SpaceList, an building surface, and an associated window or aperture."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building configuration: {description['FA']:.4f} square feet, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Write an IDF file for a building with a Zone, Space, SpaceList, a building surface, and an opening fenestration for EnergyPlus analysis."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Specifications: L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, FA={description['FA']:.4f} sq. feet, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Develop an EnergyPlus IDF file that defines a Zone, Space, SpaceList, building surface and fenestration element, and other necessary details."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Inputs: Floor Area={description['FA']:.4f} sq m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Formulate an IDF snippet that includes a Zone, Space, SpaceList, building surface, and a window or glazing element."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building parameters: {description['FA']:.4f} sq m total, Dimensions: {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"How can I generate an EnergyPlus IDF file that includes a Zone, Space, SpaceList, a building surface, and a window?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"My building details are: Floor Area={description['FA']:.4f} SF, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, Aspect Ratio={description['AR']:.4f}, and Window-to-Wall Ratio={description['WWR']:.2f}.",
    
        f"Can you provide an IDF file containing a Zone, Space, SpaceList, building surface, and its fenestration?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"The building parameters are: {description['FA']:.4f} SF total, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"What would an EnergyPlus IDF look like for a building with a Zone, Space, SpaceList, a building surface, and fenestration?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Here are my specifications: L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, FA={description['FA']:.4f} m2, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Can you help me create an IDF snippet for EnergyPlus that includes a Zone, Space, SpaceList, a building surface, and a glazing element?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"The building details are: {description['FA']:.4f} sq m , Dimensions: {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"What is the correct IDF structure for defining a Zone, Space, SpaceList, an building surface, and a fenestration in EnergyPlus?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"My building specifications are: Floor Area={description['FA']:.4f} m2, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    ]
    return random.choice(templates)

### Choose the file path

All the data related to design matrix based on which simulations are done are in Excel (csv) file. They are data about L, W, CH, FA, WWR, AR, V and EUI
Idf file from where EnergyPlus objects will be extracted from are in idf_rectangle folder.

In [4]:
# Define file paths
excel_file_path = r"C:\Users\Desktop\LLM\idf_rectangle\data.csv"
idf_folder_path = r'C:\Users\Desktop\LLM\idf_rectangle'
output_json_path = 'BuildingGeometricDataset1.json'

### Creating dictionary 
Dictionary of ata containing key= ID or design matrix, value = set of {L, W, CH, WWR, AR, FA, V, EUI}

In [5]:
# Load building descriptions
if not os.path.exists(excel_file_path):
    raise FileNotFoundError(f"CSV file not found: {excel_file_path}")

df = pd.read_csv(excel_file_path)
# Convert all necessary columns to float explicitly
float_columns = ['L', 'W', 'H', 'WWR', 'AR', 'EUI', 'FA']
df[float_columns] = df[float_columns].astype(float)

building_data = df.set_index('ID').to_dict(orient='index')

building_data

{1: {'L': 30.06,
  'W': 15.03,
  'H': 11.95,
  'WWR': 0.19,
  'AR': 2.0,
  'EUI': 267.3,
  'FA': 451.8},
 2: {'L': 33.58,
  'W': 23.0,
  'H': 9.86,
  'WWR': 0.2,
  'AR': 1.46,
  'EUI': 216.0,
  'FA': 772.3},
 3: {'L': 36.76,
  'W': 18.95,
  'H': 9.1,
  'WWR': 0.21,
  'AR': 1.94,
  'EUI': 217.3,
  'FA': 696.6},
 4: {'L': 38.69,
  'W': 19.44,
  'H': 11.45,
  'WWR': 0.28,
  'AR': 1.99,
  'EUI': 251.0,
  'FA': 752.1},
 5: {'L': 32.27,
  'W': 24.26,
  'H': 9.91,
  'WWR': 0.18,
  'AR': 1.33,
  'EUI': 212.5,
  'FA': 782.9},
 6: {'L': 15.81,
  'W': 10.54,
  'H': 10.77,
  'WWR': 0.22,
  'AR': 1.5,
  'EUI': 338.1,
  'FA': 166.6},
 7: {'L': 25.39,
  'W': 14.03,
  'H': 9.97,
  'WWR': 0.15,
  'AR': 1.81,
  'EUI': 247.7,
  'FA': 356.2},
 8: {'L': 27.77,
  'W': 20.88,
  'H': 10.73,
  'WWR': 0.22,
  'AR': 1.33,
  'EUI': 240.9,
  'FA': 579.8},
 9: {'L': 41.29,
  'W': 21.73,
  'H': 10.33,
  'WWR': 0.19,
  'AR': 1.9,
  'EUI': 214.4,
  'FA': 897.2},
 10: {'L': 16.42,
  'W': 15.06,
  'H': 8.88,
  'WWR': 0.

### Creating the dataset with Query and Answer

###

In [6]:
import random

# Set the random seed
random.seed(42)

# Initialize JSON pairs list
json_pairs = []

# Process each IDF file
for idf_file in os.listdir(idf_folder_path):
    if idf_file.endswith('.idf') and idf_file.startswith("in"):
        idf_file_path = os.path.join(idf_folder_path, idf_file)

        try:
            building_id = int(idf_file.replace("in", "").replace(".idf", ""))
            description = building_data.get(building_id)
            if not description:
                print(f"Warning: No description found for {idf_file}")
                continue
        except ValueError:
            print(f"Skipping {idf_file} - Invalid filename format.")
            continue

        # Extract EnergyPlus objects
        zones, spaces, space_lists, building_surfaces, fenestration_surfaces = extract_objects(idf_file_path)

        # Debugging: Print number of extracted components
        print(f"Processing {idf_file}: Zones({len(zones)}), Spaces({len(spaces)}), "
              f"SpaceLists({len(space_lists)}), Surfaces({len(building_surfaces)}), Windows({len(fenestration_surfaces)})")

        # Ensure at least some components exist
        if any([zones, spaces, space_lists, building_surfaces, fenestration_surfaces]):
            json_pairs.append({
                "user": generate_combined_query(description),
                "assistant": "\n".join(zones + spaces + space_lists + building_surfaces + fenestration_surfaces)
            })
        else:
            print(f"Skipping {idf_file} - No relevant objects found.")


Processing in1.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in10.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in100.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in1000.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in1001.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in1002.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in1003.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in1004.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in1005.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in1006.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in1007.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in1008.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing

In [7]:
# Save dataset to JSON
with open(output_json_path, 'w', encoding='utf-8') as json_file:
    json.dump(json_pairs, json_file, indent=2)

print(f"Fine-tuning dataset saved to {output_json_path}")

Fine-tuning dataset saved to BuildingGeometricDataset1.json


### Cleaning the json file

In [8]:
## Making 0.2 or two significant digitsimport json
import re
file_path = r"C:\Users\Desktop\LLM\BuildingGeometricDataset1.json"

def round_numbers_in_text(text, precision=2):
    # Match float or int numbers in string, including negatives
    number_pattern = re.compile(r'-?\d+\.\d+|-?\d+')

    def round_match(match):
        num = float(match.group())
        return f"{num:.{precision}f}"

    return number_pattern.sub(round_match, text)

def round_json_numbers(json_data, precision=2):
    for item in json_data:
        for key in item:
            if isinstance(item[key], str):
                item[key] = round_numbers_in_text(item[key], precision)
    return json_data

In [9]:
# === Usage ===
# Load your JSON file
with open(file_path, "r") as f:
    data = json.load(f)

# Round numbers in the JSON
rounded_data = round_json_numbers(data, precision=2)

In [10]:
# Save the modified JSON
with open(file_path, "w") as f:
    json.dump(rounded_data, f, indent=2)