### Importing necessary libraries

In [None]:
import os
import json
import pandas as pd
import random

### Function to extract EnergyPlus objects From IDF files

In [None]:
def extract_all_objects_clean_comments(idf_file_path):
    """Extract all objects from the IDF file, removing comments but keeping all content."""
    with open(idf_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    current_obj = []
    extracted_objects = []

    for line in lines:
        # Remove comments (everything after '!')
        clean_line = line.split('!')[0].rstrip()
        
        if not clean_line.strip():
            continue  # skip empty lines after comment removal

        current_obj.append(clean_line)

        # If line ends with ';' (object ends), finalize and store it
        if clean_line.endswith(';'):
            # Join lines with single spaces preserving structure
            complete_object = " ".join(line.strip() for line in current_obj)
            extracted_objects.append(complete_object)
            current_obj = []

    # If any leftover lines not closed with ';' (rare), optionally add them too
    if current_obj:
        complete_object = " ".join(line.strip() for line in current_obj)
        extracted_objects.append(complete_object)

    return extracted_objects

### Various prompt for creating dataset with different queries

In [None]:
# Function to generate a user query
def generate_combined_query(description):
    """Returns a user query asking IDF objects."""
    L = float(description['L'])
    W = float(description['W'])
    H = float(description['H'])
    FA = float(description['FA'])
    AR = float(description['AR'])
    WWR = float(description['WWR'])
    templates = [
        f"Create an EnergyPlus IDF component."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building details: FA={description['FA']:.4f} m2, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Generate an IDF snippet."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building: {description['FA']:.4f} m2, {description['L']:.4f}m x {description['W']:.4f}m x {description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Provide an EnergyPlus IDF file."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Details: L={description['L']:.4f}m, W={description['W']:.4f}m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Give me EnergyPlus IDF file."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Details: L={description['L']:.4f}m, W={description['W']:.4f}m, H={description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Make IDF file."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building: {description['FA']:.4f} m2, {description['L']:.4f}m x {description['W']:.4f}m x {description['H']:.4f}m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"Construct an EnergyPlus IDF file."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Specifications: Floor Area={description['FA']:.4f} sq.m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, Aspect Ratio={description['AR']:.4f}, Window-to-Wall Ratio={description['WWR']:.2f}.",
    
        f"Dram an IDF file."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building properties: {description['FA']:.4f} sq.m total, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Generate an EnergyPlus IDF section."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Dimensions: {description['L']:.4f} m (L) x {description['W']:.4f} m (W) x {description['H']:.4f} m (H), Total Area={description['FA']:.4f} sq.m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Create a valid IDF file for EnergyPlus."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Given parameters: FA={description['FA']:.4f} square m, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Write an IDF script."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Input specifications: Floor Area={description['FA']:.4f} square m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
        
        f"Design an IDF file."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Parameters: Floor Area={description['FA']:.4f} square feet, L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Prepare an EnergyPlus IDF."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building configuration: {description['FA']:.4f} square feet, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Write an IDF file."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Specifications: L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, FA={description['FA']:.4f} sq. feet, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Develop an EnergyPlus IDF file."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Inputs: Floor Area={description['FA']:.4f} sq m, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Formulate an IDF snippet."
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Building parameters: {description['FA']:.4f} sq m total, Dimensions: {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",

        f"How can I generate an EnergyPlus IDF file?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"My building details are: Floor Area={description['FA']:.4f} SF, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, Aspect Ratio={description['AR']:.4f}, and Window-to-Wall Ratio={description['WWR']:.2f}.",
    
        f"Can you provide an IDF file?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"The building parameters are: {description['FA']:.4f} SF total, {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"What would an EnergyPlus IDF?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"Here are my specifications: L={description['L']:.4f} m, W={description['W']:.4f} m, H={description['H']:.4f} m, FA={description['FA']:.4f} m2, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"Can you help me create an IDF snippet?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"The building details are: {description['FA']:.4f} sq m , Dimensions: {description['L']:.4f} m x {description['W']:.4f} m x {description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    
        f"What is the correct IDF structure?"
        f"Assume the surface vertices begin at (0,0,0) and follow clockwise order. Include window placement on one wall."
        f"My building specifications are: Floor Area={description['FA']:.4f} m2, Length={description['L']:.4f} m, Width={description['W']:.4f} m, Height={description['H']:.4f} m, AR={description['AR']:.4f}, WWR={description['WWR']:.2f}.",
    ]
    return random.choice(templates)

### Choose the file path

All the data related to design matrix based on which simulations are done are in Excel (csv) file. They are data about L, W, CH, FA, WWR, AR, V and EUI
Idf file from where EnergyPlus objects will be extracted from are in idf_rectangle folder.

In [None]:
# Define file paths
excel_file_path = r"C:\Users\Desktop\LLM\idf_rectangle\data.csv"
idf_folder_path = r'C:\Users\Desktop\LLM\idf_rectangle'
output_json_path = 'BuildingGeomFull1.json'

### Creating dictionary 
Dictionary of ata containing key= ID or design matrix, value = set of {L, W, CH, WWR, AR, FA, V, EUI}

In [None]:
# Load building descriptions
if not os.path.exists(excel_file_path):
    raise FileNotFoundError(f"CSV file not found: {excel_file_path}")

df = pd.read_csv(excel_file_path)
# Convert all necessary columns to float explicitly
float_columns = ['L', 'W', 'H', 'WWR', 'AR', 'EUI', 'FA']
df[float_columns] = df[float_columns].astype(float)

building_data = df.set_index('ID').to_dict(orient='index')

building_data

### Creating the dataset with Query and Answer

###

In [None]:
import random

# Set the random seed
random.seed(42)

# Initialize JSON pairs list
json_pairs = []

# Process each IDF file
for idf_file in os.listdir(idf_folder_path):
    if idf_file.endswith('.idf') and idf_file.startswith("in"):
        idf_file_path = os.path.join(idf_folder_path, idf_file)

        try:
            building_id = int(idf_file.replace("in", "").replace(".idf", ""))
            description = building_data.get(building_id)
            if not description:
                print(f"Warning: No description found for {idf_file}")
                continue
        except ValueError:
            print(f"Skipping {idf_file} - Invalid filename format.")
            continue

        # Extract EnergyPlus objects
        all_objects = extract_all_objects_clean_comments(idf_file_path)
        
        print(f"Processing {idf_file}: TotalObjects({len(all_objects)})")

        if all_objects:
            json_pairs.append({
                "user": generate_combined_query(description),
                "assistant": "\n".join(all_objects)
            })
        else:
            print(f"Skipping {idf_file} - No IDF objects found.")

In [None]:
# Save dataset to JSON
with open(output_json_path, 'w', encoding='utf-8') as json_file:
    json.dump(json_pairs, json_file, indent=2)

print(f"Fine-tuning dataset saved to {output_json_path}")

### Cleaning the json file

In [None]:
## Making 0.2 or two significant digitsimport json
import re
file_path = r"C:\Users\Desktop\LLM\BuildingGeomFull1.json"

def round_numbers_in_text(text, precision=2):
    # Match float or int numbers in string, including negatives
    number_pattern = re.compile(r'-?\d+\.\d+|-?\d+')

    def round_match(match):
        num = float(match.group())
        return f"{num:.{precision}f}"

    return number_pattern.sub(round_match, text)

def round_json_numbers(json_data, precision=2):
    for item in json_data:
        for key in item:
            if isinstance(item[key], str):
                item[key] = round_numbers_in_text(item[key], precision)
    return json_data

In [None]:
# === Usage ===
# Load your JSON file
with open(file_path, "r") as f:
    data = json.load(f)

# Round numbers in the JSON
rounded_data = round_json_numbers(data, precision=2)

In [None]:
# Save the modified JSON
with open(file_path, "w") as f:
    json.dump(rounded_data, f, indent=2)