# Obtain Complete Training Data

In [11]:
import zipfile
import os
import sqlite3
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error

In [2]:
pd.set_option('display.max_columns', None)

### Building-Specific Data

In [4]:
# File name dictionary mapping building names to file names
file_name_dict = {
    'El Sanatları': 'el_sanatlari',
    'Kadın Sığınma Evi': 'kadin_siginma_evi',
    'Spor Salonu': 'spor_salonu',
    'Sinema Salonu': 'sinema_salonu',
    'Fizik Tedavi': 'fizik_tedavi',
    'Dolunay Sitesi': 'dolunay_sitesi',
    'Apart B': 'apart_b',
    'İdari Bina A Blok': 'idari_bina_a_blok',
    'Sevgi Sitesi': 'sevgi_sitesi',
    'Apart A': 'apart_a',
    'Şefkat Sitesi': 'sefkate_sitesi',
    'Çınar Sitesi': 'cinar_sitesi',
    'Zümrüt Sitesi': 'zumrut_sitesi',
    'Huzur Sitesi': 'huzur_sitesi',
    'Umut Sitesi': 'umut_sitesi',
    'Papatya Sitesi': 'papatya_sitesi',
    'Poliklinik': 'poliklinik',
    'İdari Bina B Blok': 'idari_bina_b_blok',
    'Güven Sitesi': 'guven_sitesi'
}

# Folder path where CSV files are stored
input_data_folder = r"C:\Users\sb013698\Desktop\github\Deep Model Fusion for UBEM\Create Input Data\Input Data"

In [5]:
# Read a sample df
building_name = "Poliklinik"
building_file_name = os.path.join(input_data_folder, f"{file_name_dict[building_name]}.csv")
sample_df = pd.read_csv(building_file_name)
sample_df.head()

Unnamed: 0,Building Name,Archetype,Month,Day,Weekday,Hours,Dry Bulb Temperature {C},Dew Point Temperature {C},Relative Humidity {%},Wind Direction (deg),Wind Speed (m/s),Hourly Total Global Solar Radiation (watt/m2),Liquid Precipitation (mm),Sun Exposure Intensity (cal/cm2),Heating Schedule,Cooling Schedule,DHW Schedule,Equipment Schedule,Lighting Schedule,Occupant Schedule,Natural Ventilation Schedule,Mechanical Ventilation Schedule,DHW Flow Rate (m3/h/m2),EPD (W/m2),LPD (W/m2),Hourly DHW,Hourly Equipment,Hourly Lighting,Hourly Occupancy,Illuminance (lux),Occupant Density (people/m2),Infiltration (ACH),Heating Setpoint (celsius),Cooling Setpoint (celsius),Heating Delta-T,Cooling Delta-T,Wall-U,Roof-U,Ground-U,Window-U,IsCoolingOn,IsMechVentOn,WWR,Actual GFA (m2),Rhino GFA (m2),Floor Count,Height,Volume,Target Hourly Energy (kWh)
0,Poliklinik,Saglik Merkezi,1,1,0,0,1.5,1.5,100,180,0.9,0,0.0,0.0,1,0,1,1,0.6,0.8,0.0,0.0,0.0015,3.0,3.0,0.0015,3.0,1.8,0.08,500,0.1,0.6,24.0,24.1,22.5,0,1.134,2.984,0.591,3.2,0,0,0.429,2826.57,2024,3,9,18216,242.673769
1,Poliklinik,Saglik Merkezi,1,1,0,1,0.5,0.5,100,158,1.2,0,0.0,0.0,1,0,1,1,0.6,0.8,0.0,0.0,0.0015,3.0,3.0,0.0015,3.0,1.8,0.08,500,0.1,0.6,24.0,24.1,23.5,0,1.134,2.984,0.591,3.2,0,0,0.429,2826.57,2024,3,9,18216,247.031961
2,Poliklinik,Saglik Merkezi,1,1,0,2,0.8,0.8,100,180,1.2,0,0.0,0.0,1,0,1,1,0.6,0.8,0.0,0.0,0.0015,3.0,3.0,0.0015,3.0,1.8,0.08,500,0.1,0.6,24.0,24.1,23.2,0,1.134,2.984,0.591,3.2,0,0,0.429,2826.57,2024,3,9,18216,248.745332
3,Poliklinik,Saglik Merkezi,1,1,0,3,1.3,1.0,98,180,1.0,0,0.0,0.0,1,0,1,1,0.6,0.8,0.0,0.0,0.0015,3.0,3.0,0.0015,3.0,1.8,0.08,500,0.1,0.6,24.0,24.1,22.7,0,1.134,2.984,0.591,3.2,0,0,0.429,2826.57,2024,3,9,18216,248.65777
4,Poliklinik,Saglik Merkezi,1,1,0,4,0.6,0.2,97,293,0.9,0,0.0,0.0,1,0,1,1,0.6,0.8,0.0,0.0,0.0015,3.0,3.0,0.0015,3.0,1.8,0.08,500,0.1,0.6,24.0,24.1,23.4,0,1.134,2.984,0.591,3.2,0,0,0.429,2826.57,2024,3,9,18216,249.518468


In [None]:
# Obtain hourly building energy consumption from static scenarios
umi_directory = r"C:\Users\sb013698\Desktop\github\Deep Model Fusion for UBEM\Data Augmentation\UMI Files"
sqlite_directory = r"C:\Users\sb013698\Desktop\github\Deep Model Fusion for UBEM\Data Augmentation\SQLite Files"

scenario_list = np.arange(1, 31)

for scenario_id in scenario_list:
    umi_file_path = os.path.join(umi_directory, "S{}.umi".format(scenario_id))
    # Construct the path to the output SQLite file for this scenario
    sqlite_file_path = os.path.join(sqlite_directory, "S{}_results.sqlite3".format(scenario_id))
    # Check if the UMI file exists
    if os.path.exists(umi_file_path):
        # Open the UMI file as a ZIP archive
        with zipfile.ZipFile(umi_file_path, 'r') as umi_zip:
            # Check if 'umi.sqlite3' exists in the archive
            if 'umi.sqlite3' in umi_zip.namelist():
                # Extract 'umi.sqlite3' to the SQLite directory
                umi_zip.extract('umi.sqlite3', sqlite_directory)
                # Rename the extracted file to match the scenario ID
                os.rename(os.path.join(sqlite_directory, 'umi.sqlite3'), sqlite_file_path)
                #print("Extracted and renamed for scenario S{}".format(scenario_id))
            else:
                print("No 'umi.sqlite3' found in scenario S{}".format(scenario_id))
    else:
        print("UMI file for scenario S{} not found.".format(scenario_id))

In [7]:
# Define archetype dictionary
archetype_dict = {
    "Huzurevi": [
        'Kadın Sığınma Evi', 'Zümrüt Sitesi', 'Umut Sitesi', 'Dolunay Sitesi', 'Papatya Sitesi',
        'Şefkat Sitesi', 'Güven Sitesi', 'Sevgi Sitesi', 'Çınar Sitesi', 'Huzur Sitesi'
    ],
    "Ofis": ['İdari Bina A Blok', 'İdari Bina B Blok'],
    "Otel": ['Apart A', 'Apart B'],
    "Sosyal Bina": ['El Sanatları', 'Sinema Salonu'],
    "Spor Merkezi": ['Spor Salonu'],
    "Saglik Merkezi": ['Fizik Tedavi', 'Poliklinik'],
}

# Extract complete building list
all_buildings = set()
for building_list in archetype_dict.values():
    all_buildings.update(building_list)
all_buildings = sorted(all_buildings)  # Sort for better readability

# Assign each building to its archetype
building_archetypes = {}
for building in all_buildings:
    for archetype, building_list in archetype_dict.items():
        if building in building_list:
            building_archetypes[building] = archetype
            break

building_archetypes

{'Apart A': 'Otel',
 'Apart B': 'Otel',
 'Dolunay Sitesi': 'Huzurevi',
 'El Sanatları': 'Sosyal Bina',
 'Fizik Tedavi': 'Saglik Merkezi',
 'Güven Sitesi': 'Huzurevi',
 'Huzur Sitesi': 'Huzurevi',
 'Kadın Sığınma Evi': 'Huzurevi',
 'Papatya Sitesi': 'Huzurevi',
 'Poliklinik': 'Saglik Merkezi',
 'Sevgi Sitesi': 'Huzurevi',
 'Sinema Salonu': 'Sosyal Bina',
 'Spor Salonu': 'Spor Merkezi',
 'Umut Sitesi': 'Huzurevi',
 'Zümrüt Sitesi': 'Huzurevi',
 'Çınar Sitesi': 'Huzurevi',
 'İdari Bina A Blok': 'Ofis',
 'İdari Bina B Blok': 'Ofis',
 'Şefkat Sitesi': 'Huzurevi'}

In [8]:
# Folder path where CSV files are stored
input_data_folder = r"C:\Users\sb013698\Desktop\github\Deep Model Fusion for UBEM\Create Input Data\Input Data"
coordinates_dir = r"C:\Users\sb013698\Desktop\github\Deep Model Fusion for UBEM\Distance Matrix\coordinates.csv"
coordinates_df = pd.read_csv(coordinates_dir)

# Required columns for static features
required_columns = [
    'Month', 'Day', 'Weekday', 'Hours', 'Dry Bulb Temperature {C}',
    'Dew Point Temperature {C}', 'Relative Humidity {%}',
    'Wind Direction (deg)', 'Wind Speed (m/s)',
    'Hourly Total Global Solar Radiation (watt/m2)',
    'Liquid Precipitation (mm)', 'Sun Exposure Intensity (cal/cm2)',
    'Heating Schedule', 'Cooling Schedule', 'DHW Schedule',
    'Equipment Schedule', 'Lighting Schedule', 'Occupant Schedule',
    'Natural Ventilation Schedule', 'Mechanical Ventilation Schedule',
    'DHW Flow Rate (m3/h/m2)', 'Illuminance (lux)', 'Occupant Density (people/m2)',
    'Infiltration (ACH)', 'Cooling Setpoint (celsius)', 'Ground-U', 'IsCoolingOn', 'IsMechVentOn',
    'WWR', 'Actual GFA (m2)', 'Rhino GFA (m2)', 'Floor Count', 'Height', 'Volume',
]

# Dictionary mapping building names to file names (already provided)
file_name_dict = {
    'El Sanatları': 'el_sanatlari',
    'Kadın Sığınma Evi': 'kadin_siginma_evi',
    'Spor Salonu': 'spor_salonu',
    'Sinema Salonu': 'sinema_salonu',
    'Fizik Tedavi': 'fizik_tedavi',
    'Dolunay Sitesi': 'dolunay_sitesi',
    'Apart B': 'apart_b',
    'İdari Bina A Blok': 'idari_bina_a_blok',
    'Sevgi Sitesi': 'sevgi_sitesi',
    'Apart A': 'apart_a',
    'Şefkat Sitesi': 'sefkate_sitesi',
    'Çınar Sitesi': 'cinar_sitesi',
    'Zümrüt Sitesi': 'zumrut_sitesi',
    'Huzur Sitesi': 'huzur_sitesi',
    'Umut Sitesi': 'umut_sitesi',
    'Papatya Sitesi': 'papatya_sitesi',
    'Poliklinik': 'poliklinik',
    'İdari Bina B Blok': 'idari_bina_b_blok',
    'Güven Sitesi': 'guven_sitesi'
}

# Create an empty data frame
dataset = pd.DataFrame()

scenario_list = np.arange(1, 31)

for scenario in scenario_list:

    default_end_use_order = [
        'SDL/Cooling',
        'SDL/Heating',
        'SDL/Domestic Hot Water',
        'SDL/Equipment',
        'SDL/Lighting',
        'SDL/Window Radiation',
        'SDL/Total Operational Energy',
        'SDL Energy/Total Area',
    ]

    hour_list = np.arange(1, 8761)  # List of hours in a year
    scenario_id = scenario

    # Connect to the SQLite database
    conn = sqlite3.connect(os.path.join(sqlite_directory, f"S{scenario}_results.sqlite3"))
    
    # 1) Object Name
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM object_name_assignment;")
    building_list = [row[1] for row in cursor.fetchall()]
    cursor.close()
    
    # 2) Series
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM series;")
    end_use_settings = cursor.fetchall()
    end_use_order = [setting[1] for setting in end_use_settings[:8]]
    cursor.close()
    
    # 3) Data Point
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM data_point;")
    data_point_settings = cursor.fetchall()
    cursor.close()

    # Close the database connection
    conn.close()

    # Number of data points per building and end-use
    n_data_per_building = 8760*7 + 1
    n_data_per_end_use = 8760

    # Check conditions
    if (end_use_order == default_end_use_order) and (len(building_list) * n_data_per_building == len(data_point_settings)):
        
        # Loop through each building and calculate energy consumption
        for i in range(len(building_list)):
            # Define the index range for the current building
            index_range = np.arange(i * n_data_per_building, (i + 1) * n_data_per_building)
            
            # Extract the total energy consumption for the current building
            total_energy = [data_point_settings[j][2] for j in index_range[6*n_data_per_end_use:7*n_data_per_end_use]]
            # Transform 0 values into 1 in total_energy
            total_energy = [value if value != 0 else 1 for value in total_energy]            
            
            # Create a DataFrame for the current building and scenario
            building_name = building_list[i]
            building_archetype = building_archetypes[building_name]

            # Determine the coordinates of building centroids from coordinates_df
            centroid_x = coordinates_df.loc[coordinates_df["Building Name"]==building_name, "Centroid X"].iat[0]
            centroid_y = coordinates_df.loc[coordinates_df["Building Name"]==building_name, "Centroid Y"].iat[0]
            
            building_df = pd.DataFrame({
                "Scenario": f"S{scenario_id}",
                "Building Name": building_name,
                "Archetype": building_archetype,
                "Hour": hour_list,
                "Centroid-X": centroid_x,
                "Centroid-Y": centroid_y,
                "Energy Consumption": total_energy,
            })
            
            # Load static features for the building
            building_file_name = os.path.join(input_data_folder, f"{file_name_dict[building_name]}.csv")
            sample_df = pd.read_csv(building_file_name)
            required_data = sample_df[required_columns]
            
            # Merge static features with the scenario-specific data
            merged_df = pd.concat([building_df.reset_index(drop=True), required_data.reset_index(drop=True)], axis=1)
            
            # Append to the main dataset
            dataset = pd.concat([dataset, merged_df], ignore_index=True)
    
    else:
        raise ValueError("Mismatch in end-use order or data point counts.")

print(f"Shape of the augmented data: {dataset.shape}")

Shape of the augmented data: (4993200, 41)


In [9]:
# Load the scenario data
scenario_file = r"C:\Users\sb013698\Desktop\github\Deep Model Fusion for UBEM\Data Augmentation\static_scenarios.csv"
scenario_data = pd.read_csv(scenario_file)

# Check and adjust the "Scenario" column format in scenario_data if needed
if not scenario_data['Scenario'].iloc[0].startswith("S"):
    scenario_data['Scenario'] = scenario_data['Scenario'].apply(lambda x: f"S{x}")

# Merge the "dataset" with scenario_data based on the "Scenario" column
dataset = dataset.merge(scenario_data, on="Scenario", how="left")

# Save dataset to a CSV file
dataset.to_csv("complete_training_data.csv", index=False, encoding='utf-8')

print(f"Shape of the complete training data: {dataset.shape}")

Shape of the complete training data: (4993200, 46)


In [10]:
dataset.sample(10)

Unnamed: 0,Scenario,Building Name,Archetype,Hour,Centroid-X,Centroid-Y,Energy Consumption,Month,Day,Weekday,Hours,Dry Bulb Temperature {C},Dew Point Temperature {C},Relative Humidity {%},Wind Direction (deg),Wind Speed (m/s),Hourly Total Global Solar Radiation (watt/m2),Liquid Precipitation (mm),Sun Exposure Intensity (cal/cm2),Heating Schedule,Cooling Schedule,DHW Schedule,Equipment Schedule,Lighting Schedule,Occupant Schedule,Natural Ventilation Schedule,Mechanical Ventilation Schedule,DHW Flow Rate (m3/h/m2),Illuminance (lux),Occupant Density (people/m2),Infiltration (ACH),Cooling Setpoint (celsius),Ground-U,IsCoolingOn,IsMechVentOn,WWR,Actual GFA (m2),Rhino GFA (m2),Floor Count,Height,Volume,lpd,heating_setpoint,wall_u,roof_u,window_u
3189779,S20,Sinema Salonu,Sosyal Bina,1140,72.446224,6.281503,26.598029,2,17,5,11,17.0,0.5,33,203,1.8,30060,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.5,0.5,0.0018,300,0.054,0.6,24.1,0.591,0,0,0.098,608.56,715,2,6,4290,8,20,1.212,0.699,2.4
13563,S1,Sevgi Sitesi,Huzurevi,4804,10.656581,62.488332,164.41833,7,20,4,3,16.1,13.4,84,113,0.1,2400,0.0,0.0,0,0,0.8,0.7,0.2,0.7,0.0,0.0,0.00013,200,0.049,0.3,24.1,0.591,0,0,0.2,5084.63,5069,3,9,45621,8,20,1.212,0.699,2.4
1006796,S7,Poliklinik,Saglik Merkezi,8157,-121.556456,-126.632222,89.134364,12,6,3,20,1.7,1.7,100,158,1.0,0,0.0,0.0,1,0,1.0,1.0,0.6,0.8,0.0,0.0,0.0015,500,0.1,0.6,24.1,0.591,0,0,0.429,2826.57,2024,3,9,18216,10,22,1.212,1.112,2.4
1463221,S9,Apart B,Otel,302,-127.389524,-55.568895,9.935349,1,13,5,13,4.5,0.4,75,0,1.9,3480,0.0,0.0,1,0,0.8,1.0,1.0,0.8,0.5,0.5,0.0011,200,0.054,0.6,24.1,0.591,0,0,0.061,468.42,417,3,9,3753,8,18,0.9,0.699,1.791
2669080,S17,Kadın Sığınma Evi,Huzurevi,6041,78.078195,-190.218451,56.950788,9,9,6,16,20.4,16.0,76,270,0.4,600,0.0,0.0,0,0,0.8,1.0,1.0,1.0,0.5,0.5,0.00013,200,0.049,0.3,24.1,0.591,0,0,0.134,1296.0,1408,3,9,12672,6,20,0.9,0.699,1.2
1558698,S10,Şefkat Sitesi,Huzurevi,8179,78.656926,185.215503,193.484592,12,7,4,18,8.0,5.8,86,158,1.0,0,0.0,0.0,1,0,0.8,1.0,1.0,1.0,0.5,0.5,0.00013,200,0.049,0.3,24.1,0.591,0,0,0.134,5471.04,4354,6,18,78372,10,20,0.602,0.3,2.4
3057186,S19,Çınar Sitesi,Huzurevi,8707,-169.513988,-99.582988,79.564622,12,29,5,18,3.8,3.8,100,338,1.2,0,2.0,2.0,1,0,0.8,1.0,1.0,1.0,0.5,0.5,0.00013,200,0.049,0.3,24.1,0.591,0,0,0.1,2274.26,2070,3,9,18630,4,20,0.9,0.3,2.4
893962,S6,Kadın Sığınma Evi,Huzurevi,443,78.078195,-190.218451,51.317429,1,19,4,10,14.5,5.8,56,248,2.1,27780,0.0,0.0,1,0,0.8,1.0,1.0,1.0,0.5,0.5,0.00013,200,0.049,0.3,24.1,0.591,0,0,0.134,1296.0,1408,3,9,12672,2,18,1.212,0.3,1.791
3224310,S20,Umut Sitesi,Huzurevi,631,-167.983811,-140.771477,63.696763,1,27,5,6,6.0,6.0,100,338,2.6,720,0.7,0.7,1,0,0.8,1.0,1.0,1.0,0.0,0.0,0.00013,200,0.049,0.3,24.1,0.591,0,0,0.1,1873.97,1501,3,9,13509,8,20,1.212,0.699,2.4
230047,S2,İdari Bina B Blok,Ofis,2288,4.862279,-128.947279,31.395959,4,6,4,7,10.0,5.7,75,90,1.3,30120,0.0,0.0,1,0,0.6,1.0,1.0,1.0,0.5,0.5,0.0018,500,0.054,0.6,24.1,0.591,1,0,0.245,1121.64,1618,3,9,14562,8,20,0.3,0.699,2.4


# END