### Importing necessary libraries

In [1]:
import os
import json
import pandas as pd
import random

### Function to extract EnergyPlus objects From IDF files

In [None]:
# Function to extract EnergyPlus objects with strict name matching
def extract_objects(idf_file_path):
    """Extracts strictly matching Zone, Space, SpaceList, BuildingSurface:Detailed, and FenestrationSurface:Detailed objects."""
    with open(idf_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    current_obj = []
    extracted_objects = {key: [] for key in ["Zone,", "Space,", "SpaceList,", "BuildingSurface:Detailed,", "FenestrationSurface:Detailed,"]}

    inside_object = None  # Track current object type

    for line in lines:
        stripped_line = line.strip()
        if not stripped_line:
            continue  # Skip empty lines

        # Normalize spacing and remove excessive gaps
        clean_line = " ".join(stripped_line.split())  
        clean_line = clean_line.replace(" ,", ",").replace(", ", ",")  # Ensure proper comma spacing

        # Detect new object start
        for key in extracted_objects.keys():
            if clean_line.startswith(key):
                inside_object = key  # Set current object type
                current_obj = [clean_line]  # Start new object
                break
        else:
            # If already inside an object, continue collecting lines
            if inside_object:
                current_obj.append(clean_line)

        # If line ends with ';', complete the object
        if ";" in clean_line and inside_object:
            # Join the current object lines with spaces, then add a newline after the semicolon
            complete_object = " ".join(current_obj)
            extracted_objects[inside_object].append(complete_object)
            inside_object = None  # Reset tracking variable

    return [extracted_objects[key] for key in extracted_objects]

### Various prompt for creating dataset with different queries

In [None]:
# Function to generate a user query
def generate_combined_query(description):
    """Returns a user query asking for a Zone, Space, SpaceList, BuildingSurface, and FenestrationSurface."""
    templates = [
        f"Create an EnergyPlus IDF component including a Zone, Space, and SpaceList along with a building surface and its fenestration."
        f"Building details: FA={description['FA']:.4f} m2, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Generate an IDF snippet defining a Zone, Space, SpaceList, a building surface, and an associated window/aperture."
        f"Building: {description['FA']:.4f} m2, {description['L']:.4f}m x {description['W']:.4f}m x {description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Provide an EnergyPlus IDF file that includes a Zone, Space, SpaceList, a building surface, and a window/aperture."
        f"Details: L={description['L']:.4f}m, W={description['W']:.4f}m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Give me EnergyPlus IDF file that includes a Zone, Space, SpaceList, a building surface, and a window/aperture."
        f"Details: L={description['L']:.4f}m, W={description['W']:.4f}m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Make IDF defining a Zone, Space, SpaceList, a building surface, and an associated fenestration."
        f"Building: {description['FA']:.4f} m2, {description['L']:.4f}m x {description['W']:.4f}m x {description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
        f"Construct an EnergyPlus IDF file featuring a Zone, Space, SpaceList, a building surface, and its fenestration.\n"
        f"Specifications: Floor Area={description['FA']:.4f} sq.m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, Aspect Ratio={description['AR']:.4f}, Window-to-Wall Ratio={description['WWR']:.2f}.",
    
        f"Dram an IDF file that includes a Zone, Space, SpaceList, a building surface, and a window or other fenestration element."
        f"Building properties: {description['FA']:.4f} sq.m total, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Generate an EnergyPlus IDF section containing a Zone, Space, SpaceList, a building surface with fenestration, and necessary geometric details."
        f"Dimensions: {description['L']:.4f} m (L) x {description['W']:.4f} m (W) x {description['H']:.4f} m (H), Total Area={description['FA']:.4f} sq.m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Create a valid IDF file for EnergyPlus that defines a Zone, Space, SpaceList, a building envelope surface, and an opening such as a window."
        f"Given parameters: FA={description['FA']:.4f} square m, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Write an IDF script containing a Zone, Space, SpaceList, an building surface, and its corresponding fenestration."
        f"Input specifications: Floor Area={description['FA']:.4f} square m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
        
        f"Design an IDF file that incorporates a Zone, Space, SpaceList, a key building surface, and its fenestration for EnergyPlus simulation."
        f"Parameters: Floor Area={description['FA']:.4f} square feet, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Prepare an EnergyPlus IDF definition that includes a Zone, Space, SpaceList, an building surface, and an associated window or aperture."
        f"Building configuration: {description['FA']:.4f} square feet, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Write an IDF file for a building with a Zone, Space, SpaceList, a building surface, and an opening fenestration for EnergyPlus analysis."
        f"Specifications: L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, FA={description['FA']:.4f} sq. feet, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Develop an EnergyPlus IDF file that defines a Zone, Space, SpaceList, building surface and fenestration element, and other necessary details."
        f"Inputs: Floor Area={description['FA']:.4f} sq m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Formulate an IDF snippet that includes a Zone, Space, SpaceList, building surface, and a window or glazing element."
        f"Building parameters: {description['FA']:.4f} sq m total, Dimensions: {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"How can I generate an EnergyPlus IDF file that includes a Zone, Space, SpaceList, a building surface, and a window?"
        f"My building details are: Floor Area={description['FA']:.4f} SF, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, Aspect Ratio={description['AR']:.4f}, and Window-to-Wall Ratio={description['WWR']:.2f}.",
    
        f"Can you provide an IDF file containing a Zone, Space, SpaceList, building surface, and its fenestration?"
        f"The building parameters are: {description['FA']:.4f} SF total, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"What would an EnergyPlus IDF look like for a building with a Zone, Space, SpaceList, a building surface, and fenestration?"
        f"Here are my specifications: L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, FA={description['FA']:.4f} m2, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Can you help me create an IDF snippet for EnergyPlus that includes a Zone, Space, SpaceList, a building surface, and a glazing element?"
        f"The building details are: {description['FA']:.4f} sq m , Dimensions: {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"What is the correct IDF structure for defining a Zone, Space, SpaceList, an building surface, and a fenestration in EnergyPlus?"
        f"My building specifications are: Floor Area={description['FA']:.4f} m2, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    ]
    return random.choice(templates)

### Choose the file path

All the data related to design matrix based on which simulations are done are in Excel (csv) file. They are data about L, W, CH, FA, WWR, AR, V and EUI
Idf file from where EnergyPlus objects will be extracted from are in idf_rectangle folder.

In [12]:
# Define file paths
excel_file_path = r'C:\Users\Jayedi Aman\OneDrive - University of Missouri\Desktop\LLM Eplus\idf_rectangle\data.csv'
idf_folder_path = r'C:\Users\Jayedi Aman\OneDrive - University of Missouri\Desktop\LLM Eplus\idf_rectangle'
output_json_path = 'BuildingGeometricDataset.json'

### Creating dictionary 
Dictionary of ata containing key= ID or design matrix, value = set of {L, W, CH, WWR, AR, FA, V, EUI}

In [13]:
# Load building descriptions
if not os.path.exists(excel_file_path):
    raise FileNotFoundError(f"CSV file not found: {excel_file_path}")

df = pd.read_csv(excel_file_path)
building_data = df.set_index('ID').to_dict(orient='index')

building_data

{1: {'L': 28.1515,
  'W': 15.6465,
  'H': 2.44,
  'WWR': 0.55,
  'AR': 1.8,
  'FA': 440.45,
  'V': 1074.75,
  'EUI': 158.68},
 2: {'L': 43.737,
  'W': 16.5005,
  'H': 3.66,
  'WWR': 0.19,
  'AR': 2.65,
  'FA': 721.67,
  'V': 2641.36,
  'EUI': 144.8},
 3: {'L': 21.0755,
  'W': 15.2805,
  'H': 2.9,
  'WWR': 0.15,
  'AR': 1.38,
  'FA': 322.0,
  'V': 933.93,
  'EUI': 144.8},
 4: {'L': 27.7855,
  'W': 12.81,
  'H': 2.48,
  'WWR': 0.21,
  'AR': 2.17,
  'FA': 355.91,
  'V': 882.71,
  'EUI': 143.85},
 5: {'L': 38.491,
  'W': 16.958,
  'H': 2.89,
  'WWR': 0.41,
  'AR': 2.27,
  'FA': 652.64,
  'V': 1886.39,
  'EUI': 150.79},
 6: {'L': 31.4455,
  'W': 18.605,
  'H': 2.53,
  'WWR': 0.16,
  'AR': 1.69,
  'FA': 585.01,
  'V': 1480.16,
  'EUI': 134.7},
 7: {'L': 30.7135,
  'W': 15.677,
  'H': 3.42,
  'WWR': 0.42,
  'AR': 1.96,
  'FA': 481.42,
  'V': 1646.71,
  'EUI': 164.35},
 8: {'L': 26.962,
  'W': 13.0235,
  'H': 3.33,
  'WWR': 0.17,
  'AR': 2.07,
  'FA': 351.08,
  'V': 1169.29,
  'EUI': 151.1},
 

### Creating the dataset with Query and Answer

###

In [14]:
# Initialize JSON pairs list
json_pairs = []

# Process each IDF file
for idf_file in os.listdir(idf_folder_path):
    if idf_file.endswith('.idf') and idf_file.startswith("in"):
        idf_file_path = os.path.join(idf_folder_path, idf_file)

        try:
            building_id = int(idf_file.replace("in", "").replace(".idf", ""))
            description = building_data.get(building_id)
            if not description:
                print(f"Warning: No description found for {idf_file}")
                continue
        except ValueError:
            print(f"Skipping {idf_file} - Invalid filename format.")
            continue

        # Extract EnergyPlus objects
        zones, spaces, space_lists, building_surfaces, fenestration_surfaces = extract_objects(idf_file_path)

        # Debugging: Print number of extracted components
        print(f"Processing {idf_file}: Zones({len(zones)}), Spaces({len(spaces)}), "
              f"SpaceLists({len(space_lists)}), Surfaces({len(building_surfaces)}), Windows({len(fenestration_surfaces)})")

        # Ensure at least some components exist
        if any([zones, spaces, space_lists, building_surfaces, fenestration_surfaces]):
            json_pairs.append({
                "user": generate_combined_query(description),
                "assistant": "\n".join(zones + spaces + space_lists + building_surfaces + fenestration_surfaces)
            })
        else:
            print(f"Skipping {idf_file} - No relevant objects found.")


Processing in1.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in10.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in100.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in101.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in102.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in103.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in104.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in105.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in106.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in107.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in108.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in109.idf: Zones(1), Spaces(1), SpaceLists(1), Surfaces(6), Windows(4)
Processing in11.idf

In [15]:
# Save dataset to JSON
with open(output_json_path, 'w', encoding='utf-8') as json_file:
    json.dump(json_pairs, json_file, indent=2)

print(f"Fine-tuning dataset saved to {output_json_path}")

Fine-tuning dataset saved to BuildingGeometricDataset.json


### Cleaning the json file

In [None]:
## Making 0.2 or two significant digitsimport json
import re
file_path = r'C:\Users\Jayedi Aman\OneDrive - University of Missouri\Desktop\LLM Eplus\BuildingGeometricDataset.json'

def round_numbers_in_text(text, precision=2):
    # Match float or int numbers in string, including negatives
    number_pattern = re.compile(r'-?\d+\.\d+|-?\d+')

    def round_match(match):
        num = float(match.group())
        return f"{num:.{precision}f}"

    return number_pattern.sub(round_match, text)

def round_json_numbers(json_data, precision=2):
    for item in json_data:
        for key in item:
            if isinstance(item[key], str):
                item[key] = round_numbers_in_text(item[key], precision)
    return json_data

In [17]:
# === Usage ===
# Load your JSON file
with open(file_path, "r") as f:
    data = json.load(f)

# Round numbers in the JSON
rounded_data = round_json_numbers(data, precision=2)

In [18]:
# Save the modified JSON
with open(file_path, "w") as f:
    json.dump(rounded_data, f, indent=2)