# Retrofitting

In [39]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
import itertools
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Data Processing

In [3]:
model_files_dir = r"C:\Users\said_\OneDrive\Masaüstü\Finance Project\Training\Model Files"

In [4]:
hybrid_data = pd.read_csv(os.path.join(model_files_dir, "hybrid_data.csv"))
hybrid_data.head()

Unnamed: 0,Energy Consumption,Scenario,Building Name,Archetype,Month,Day,Weekday,Hours,Dry Bulb Temperature {C},Dew Point Temperature {C},Relative Humidity {%},Wind Direction (deg),Wind Speed (m/s),Hourly Total Global Solar Radiation (watt/m2),Liquid Precipitation (mm),Sun Exposure Intensity (cal/cm2),Heating Schedule,Cooling Schedule,DHW Schedule,Equipment Schedule,Lighting Schedule,Occupant Schedule,Natural Ventilation Schedule,Mechanical Ventilation Schedule,DHW Flow Rate (m3/h/m2),Illuminance (lux),Occupant Density (people/m2),Infiltration (ACH),Cooling Setpoint (celsius),Ground-U,IsCoolingOn,IsMechVentOn,lpd,heating_setpoint,wall_u,roof_u,window_u,WWR,Rhino GFA (m2),Floor Count,Height,Volume,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,embedding_9,embedding_10,embedding_11,embedding_12,embedding_13,embedding_14,embedding_15,embedding_16
0,163.857098,S1,Güven Sitesi,Huzurevi,1,1,0,0,1.5,1.5,100,180,0.9,0,0.0,0.0,1,0,0.8,0.7,0.2,0.7,0.0,0.0,0.00013,200,0.049,0.3,24.1,0.591,0,0,8,20,1.212,0.699,2.4,0.2,5051,3,9,45459,0.394599,-1.807778,-0.745936,0.352861,0.776236,-0.579632,-0.194288,-1.101295,-1.568504,-0.632454,0.253306,-1.22487,0.287431,0.643635,1.766967,0.835914
1,163.857098,S1,Güven Sitesi,Huzurevi,1,1,0,1,0.5,0.5,100,158,1.2,0,0.0,0.0,1,0,0.8,0.7,0.2,0.7,0.0,0.0,0.00013,200,0.049,0.3,24.1,0.591,0,0,8,20,1.212,0.699,2.4,0.2,5051,3,9,45459,0.394599,-1.807778,-0.745936,0.352861,0.776236,-0.579632,-0.194288,-1.101295,-1.568504,-0.632454,0.253306,-1.22487,0.287431,0.643635,1.766967,0.835914
2,163.857098,S1,Güven Sitesi,Huzurevi,1,1,0,2,0.8,0.8,100,180,1.2,0,0.0,0.0,1,0,0.8,0.7,0.2,0.7,0.0,0.0,0.00013,200,0.049,0.3,24.1,0.591,0,0,8,20,1.212,0.699,2.4,0.2,5051,3,9,45459,0.394599,-1.807778,-0.745936,0.352861,0.776236,-0.579632,-0.194288,-1.101295,-1.568504,-0.632454,0.253306,-1.22487,0.287431,0.643635,1.766967,0.835914
3,163.857098,S1,Güven Sitesi,Huzurevi,1,1,0,3,1.3,1.0,98,180,1.0,0,0.0,0.0,1,0,0.8,0.7,0.2,0.7,0.0,0.0,0.00013,200,0.049,0.3,24.1,0.591,0,0,8,20,1.212,0.699,2.4,0.2,5051,3,9,45459,0.394599,-1.807778,-0.745936,0.352861,0.776236,-0.579632,-0.194288,-1.101295,-1.568504,-0.632454,0.253306,-1.22487,0.287431,0.643635,1.766967,0.835914
4,163.857098,S1,Güven Sitesi,Huzurevi,1,1,0,4,0.6,0.2,97,293,0.9,0,0.0,0.0,1,0,0.8,0.7,0.2,0.7,0.0,0.0,0.00013,200,0.049,0.3,24.1,0.591,0,0,8,20,1.212,0.699,2.4,0.2,5051,3,9,45459,0.394599,-1.807778,-0.745936,0.352861,0.776236,-0.579632,-0.194288,-1.101295,-1.568504,-0.632454,0.253306,-1.22487,0.287431,0.643635,1.766967,0.835914


In [5]:
hybrid_data.shape

(4993200, 58)

In [6]:
# Calculate correlation with the target column 'Energy Consumption'
correlation = hybrid_data.corr(numeric_only=True)["Energy Consumption"]

# Take absolute values and sort in descending order
sorted_correlation = correlation.abs().sort_values(ascending=False)
sorted_correlation

Energy Consumption                               1.000000
Rhino GFA (m2)                                   0.958359
Volume                                           0.907957
DHW Flow Rate (m3/h/m2)                          0.610184
Floor Count                                      0.558923
Height                                           0.558923
Infiltration (ACH)                               0.525436
Occupant Schedule                                0.463452
Equipment Schedule                               0.387833
Illuminance (lux)                                0.387470
DHW Schedule                                     0.381630
Lighting Schedule                                0.322086
IsCoolingOn                                      0.289431
Heating Schedule                                 0.183580
IsMechVentOn                                     0.165646
Occupant Density (people/m2)                     0.137317
Cooling Schedule                                 0.086035
Natural Ventil

In [7]:
# Define targets and features
target_feature = ['Energy Consumption']

# Define identity features
identity_features = ['Scenario', 'Building Name', 'Archetype']

# Define time-dependent data for LSTM
time_dependent_features = [
    'Month', 'Day', 'Weekday', 'Hours', 'Dry Bulb Temperature {C}',
    'Dew Point Temperature {C}', 'Relative Humidity {%}',
    'Wind Direction (deg)', 'Wind Speed (m/s)',
    'Hourly Total Global Solar Radiation (watt/m2)',
    'Liquid Precipitation (mm)', 'Sun Exposure Intensity (cal/cm2)',
    'Heating Schedule', 'Cooling Schedule', 'DHW Schedule',
    'Equipment Schedule', 'Lighting Schedule', 'Occupant Schedule',
    'Natural Ventilation Schedule', 'Mechanical Ventilation Schedule',
] 

# Define static data for feed-forward NN
static_features = [
    'DHW Flow Rate (m3/h/m2)', 'Illuminance (lux)',
    'Occupant Density (people/m2)', 'Infiltration (ACH)',
    'Cooling Setpoint (celsius)', 'Ground-U', 'IsCoolingOn', 'IsMechVentOn',
    'lpd', 'heating_setpoint', 'wall_u', 'roof_u', 'window_u',
    'WWR','Rhino GFA (m2)', 'Floor Count', 'Height', 'Volume',
]

# Define geometrical data for graph embeddings
geometrical_features = [
    'Centroid-X', 'Centroid-Y', 'Rhino GFA (m2)', 
    'Floor Count', 'Height', 'Volume',
]

In [8]:
joint_df = hybrid_data.copy()

# Initialize an empty dictionary to store building names and their indexes
building_indexes = {}

# Iterate over unique building names
for building_name in joint_df["Building Name"].unique():
    
    # Get all unique scenarios for the current building
    scenarios = joint_df[joint_df["Building Name"]==building_name]["Scenario"].unique()
    
    # Select up a sample scenario
    for scenario in ["S1"]:
        
        # Get indexes for the current building name
        indexes = joint_df[(joint_df["Building Name"]==building_name) & (joint_df["Scenario"]==scenario)].index.tolist()
        # Add the building name and its corresponding indexes to the dictionary
        building_indexes[(building_name, scenario)] = indexes

In [9]:
joint_df = hybrid_data.copy()

# Remove redundant columns before creating sequences
redundant_columns = ["Scenario", "Building Name", "Month", "Day", "Weekday", "Hours"]
joint_df = joint_df.drop(columns=redundant_columns)

# Load the original column order
column_order_file_name = "column_order.pkl"
with open(os.path.join(model_files_dir, column_order_file_name), "r", encoding="utf-8") as file:
    column_order = [line.strip() for line in file]

# Make sure the joint_df has the original column order
joint_df = joint_df[column_order]

old_column_names = set(joint_df.columns)

# Apply one-hot-encoding
joint_df = pd.get_dummies(joint_df)
new_column_names = set(joint_df.columns)
new_column_names = new_column_names - old_column_names


# Define columns for time-dependent features
dynamic_columns = target_feature + time_dependent_features + list(new_column_names)
datetime_features = ['Month', 'Day', 'Weekday', 'Hours'] # Remove datetime features from the dynamic_column
dynamic_columns = [feature for feature in dynamic_columns if feature not in datetime_features]
dynamic_columns = list(set(dynamic_columns)) # Make sure there is no duplicates

# Embedding features
emdedding_dim = 16
embedding_features = [f"embedding_{index}" for index in range(1, emdedding_dim+1)]

# Define columns for static features
static_columns = static_features + embedding_features + list(new_column_names) # Make sure there is no targets and sequences
static_columns = list(set(static_columns)) # Make sure there is no duplicates

# Define functions

In [14]:
def create_sequences(building_name, data, building_indexes, pre_trained_scaler, scenario):

    sequence_length = 6

    # Get teh data for the given building
    index_list = building_indexes[(building_name, "S1")]
    current_df = data.loc[index_list, :]

    # Define parameter values for the given retrofit scenario
    wall_u, roof_u, window_u, lpd, hsp = scenario

    # Define baseline combinations: Wall-U, Roof-U, LPD, Heating Setpoint
    current_df["wall_u"] = wall_u
    current_df["roof_u"] = roof_u
    current_df["window_u"] = window_u
    current_df["lpd"] = lpd
    current_df["heating_setpoint"] = hsp

    # Scale the current df using StandardScaler() except the target column: 'Energy Consumption'
    columns_not_scaled = ['Energy Consumption']
    numerical_columns = current_df.select_dtypes(include=['float64', 'int64']).columns.tolist()
    # Exclude the columns that should not be scaled
    numerical_columns = [col for col in numerical_columns if col not in columns_not_scaled]
    subset_to_scale = current_df[numerical_columns] # Extract the columns for scaling
    # Define a pre-trained scaler
    scaler = pre_trained_scaler
    scaled_subset = scaler.transform(subset_to_scale)
    # Replace the original numerical columns with scaled values in the DataFrame
    current_df[numerical_columns] = scaled_subset

    # Add an if statement to check if the current_df has the same columns with the original df 
    # after one-hot-endoding and scaling

    # Create sequences and targets for the given retrofit scenario  
    sequences_lstm = list()
    sequences_mlp = list()
    targets = list()

    for i in range(len(current_df)-sequence_length):

        # Create sequences for time-dependent features
        seq_lstm = current_df.iloc[i:(i+sequence_length)][dynamic_columns].values
        sequences_lstm.append(seq_lstm)
        
        # Create sequences for static features
        seq_mlp = current_df.iloc[(i+sequence_length)][static_columns].values
        sequences_mlp.append(seq_mlp)
        
        # Assuming the target column is at index 0
        label = current_df.iloc[(i+sequence_length), 0]
        targets.append(label)

    # Convert sequences and targets to numpy arrays
    sequences_lstm = np.array(sequences_lstm, dtype=np.float32)
    sequences_mlp = np.array(sequences_mlp, dtype=np.float32)
    targets = np.array(targets, dtype=np.float32)

    return sequences_lstm, sequences_mlp, targets

def get_training_data(sequences_lstm, sequences_mlp, targets):
    
    # Use 100% of the data as the test set (no training or validation)
    test_size = 0.999
    
    # Splitting LSTM data (100% test data)
    X_lstm_train, X_lstm_test, y_train, y_test = train_test_split(
        sequences_lstm, targets, test_size=test_size, shuffle=False,
    )
    
    # Splitting MLP data (100% test data)
    X_mlp_train, X_mlp_test = train_test_split(
        sequences_mlp, test_size=test_size, shuffle=False,
    )

    batch_size = 1440

    # Define train-validation-test sets using TensorFlow    
    test_dataset = tf.data.Dataset.from_tensor_slices(
        ((X_lstm_test, X_mlp_test), y_test)
    ).batch(batch_size)

    return test_dataset

def make_predictions(dataset, model):
    
    # Predict the scenario results using the pre-trained hybrid model
    preds = model.predict(dataset, verbose=0)
    
    return preds

# Retrofit Analysis

### Building-specific retrofitting

In [11]:
# Define a directory to store the building-specific retrofitting results
retrofitting_dir = r"C:\Users\said_\OneDrive\Masaüstü\Finance Project\Training\Retrofit Results"

In [12]:
# Create retrofit scenarios
# Define parameter values for retrofit scenarios
wall_u_values = [0.3, 0.6, 0.9]
roof_u_values = [0.3, 0.7]
window_u_values = [0.8, 1.0, 1.2]
lpd_values = [1.5, 2.0, 2.5]
heating_setpoints = [21, 22, 23]

# Generate all possible combinations of parameter values
retrofit_scenarios = list(
    itertools.product(
        wall_u_values,
        roof_u_values,
        window_u_values,
        lpd_values,
        heating_setpoints,
    )
)

print("Number of scenarios:", len(retrofit_scenarios))
retrofit_scenarios[:5]

Number of scenarios: 162


[(0.3, 0.3, 0.8, 1.5, 21),
 (0.3, 0.3, 0.8, 1.5, 22),
 (0.3, 0.3, 0.8, 1.5, 23),
 (0.3, 0.3, 0.8, 2.0, 21),
 (0.3, 0.3, 0.8, 2.0, 22)]

In [17]:
file_name_dict = {
    'El Sanatları': 'el_sanatlari',
    'Kadın Sığınma Evi': 'kadin_siginma_evi',
    'Spor Salonu': 'spor_salonu',
    'Sinema Salonu': 'sinema_salonu',
    'Fizik Tedavi': 'fizik_tedavi',
    'Dolunay Sitesi': 'dolunay_sitesi',
    'Apart B': 'apart_b',
    'İdari Bina A Blok': 'idari_bina_a_blok',
    'Sevgi Sitesi': 'sevgi_sitesi',
    'Apart A': 'apart_a',
    'Şefkat Sitesi': 'sefkate_sitesi',
    'Çınar Sitesi': 'cinar_sitesi',
    'Zümrüt Sitesi': 'zumrut_sitesi',
    'Huzur Sitesi': 'huzur_sitesi',
    'Umut Sitesi': 'umut_sitesi',
    'Papatya Sitesi': 'papatya_sitesi',
    'Poliklinik': 'poliklinik',
    'İdari Bina B Blok': 'idari_bina_b_blok',
    'Güven Sitesi': 'guven_sitesi',
}

# Define baseline combinations: Wall-U, Roof-U, Window-U, LPD, Heating Setpoint
baseline_dict = {
    "Huzurevi": [1.475, 2.984, 3.062, 5, 24],
    "Ofis": [0.449, 0.313, 2.1, 4, 24],
    "Otel": [1.429, 2.984, 3.4, 4, 24],
    "Sosyal Bina": [1.467, 2.984, 3.4, 3, 24],
    "Spor Merkezi": [1.389, 2.984, 4.0, 3, 24],
    "Saglik Merkezi": [1.134, 2.984, 3.2, 3, 24],
}

# Load the pre-trained hybrid model
keras_model_dir = r"C:\Users\said_\OneDrive\Masaüstü\Finance Project\Training\saved_model"
hybrid_model = load_model(os.path.join(keras_model_dir, "hybrid_model.keras"))

# Load the pre-trained scaler object
with open(os.path.join(model_files_dir, "scaler.pkl"), "rb") as f:
    loaded_scaler = pickle.load(f)

# Define a directory to store the building-specific retrofitting results
retrofitting_dir = r"C:\Users\said_\OneDrive\Masaüstü\Finance Project\Training\Retrofit Results"

for building_name, file_name in file_name_dict.items():

    archetype = hybrid_data[hybrid_data["Building Name"]==building_name]["Archetype"].unique().item()

    # Store scenario results for each building
    scenario_results = pd.DataFrame(columns=[
        'Building Name', 'Archetype', 'Scenario', 'Wall-U', 'Roof-U', 'Window-U',
        'LPD', 'Heating Setpoint', 'Annual Energy Consumption',
    ])

    # Obtain baseline results
    baseline_combo = baseline_dict[archetype]
    # Create sequences and test data for the baseline model
    baseline_sequences_lstm, baseline_sequences_mlp, targets = create_sequences(
        building_name, joint_df, building_indexes, loaded_scaler, baseline_combo,
    )
    baseline_test_data = get_training_data(baseline_sequences_lstm, baseline_sequences_mlp, targets)
    # Predict the annual operational energy consumption of the baseline scenario
    baseline_preds = make_predictions(baseline_test_data, hybrid_model)

    # Append baseline results to the scenario_results list
    scenario_results.loc[len(scenario_results)] = {
        'Scenario': 'Baseline',
        'Building Name': building_name,
        'Archetype': archetype,
        'Wall-U': baseline_combo[0],
        'Roof-U': baseline_combo[1],
        'Window-U': baseline_combo[2],
        'LPD': baseline_combo[3],
        'Heating Setpoint': baseline_combo[4],
        'Annual Energy Consumption': np.sum(baseline_preds),
    }

    # Predict the annual energy consumption of retrofit scenarios per building
    count = 1 # Count scenarios
    
    for retrofit_combo in retrofit_scenarios:      

        # Create sequences and test data for the given retrofit combo
        sequences_lstm, sequences_mlp, targets = create_sequences(
            building_name, joint_df, building_indexes, loaded_scaler, retrofit_combo,
        )
        test_data = get_training_data(sequences_lstm, sequences_mlp, targets)
        # Predict the annual operational energy consumption of the given retrofit combo
        combo_preds = make_predictions(test_data, hybrid_model)

        # Append the combo results to the scenario_results list
        scenario_results.loc[len(scenario_results)] = {
            'Scenario': f'S{count}',
            'Building Name': building_name,
            'Archetype': archetype,
            'Wall-U': retrofit_combo[0],
            'Roof-U': retrofit_combo[1],
            'Window-U': retrofit_combo[2],
            'LPD': retrofit_combo[3],
            'Heating Setpoint': retrofit_combo[4],
            'Annual Energy Consumption': np.sum(combo_preds),
        }

        count = count + 1

    # Convert the result_dict into a data frame
    result_df = scenario_results.reset_index(drop=True)

    # Save scenario results of the given building to a CSV file
    result_df.to_csv(os.path.join(
        retrofitting_dir, f"{file_name}.csv"), index=False, encoding='utf-8',
    )
    print(f"{building_name} is completed.")

El Sanatları is completed.
Kadın Sığınma Evi is completed.
Spor Salonu is completed.
Sinema Salonu is completed.
Fizik Tedavi is completed.
Dolunay Sitesi is completed.
Apart B is completed.
İdari Bina A Blok is completed.
Sevgi Sitesi is completed.
Apart A is completed.
Şefkat Sitesi is completed.
Çınar Sitesi is completed.
Zümrüt Sitesi is completed.
Huzur Sitesi is completed.
Umut Sitesi is completed.
Papatya Sitesi is completed.
Poliklinik is completed.
İdari Bina B Blok is completed.
Güven Sitesi is completed.


### Campus-level retrofitting

In [52]:
n_scenarios = len(retrofit_scenarios)

# Store baseline and scenario results
baseline_campus = 0
scenario_campus = np.zeros(n_scenarios,)

file_name_dict = {
    'El Sanatları': 'el_sanatlari',
    'Kadın Sığınma Evi': 'kadin_siginma_evi',
    'Spor Salonu': 'spor_salonu',
    'Sinema Salonu': 'sinema_salonu',
    'Fizik Tedavi': 'fizik_tedavi',
    'Dolunay Sitesi': 'dolunay_sitesi',
    'Apart B': 'apart_b',
    'İdari Bina A Blok': 'idari_bina_a_blok',
    'Sevgi Sitesi': 'sevgi_sitesi',
    'Apart A': 'apart_a',
    'Şefkat Sitesi': 'sefkate_sitesi',
    'Çınar Sitesi': 'cinar_sitesi',
    'Zümrüt Sitesi': 'zumrut_sitesi',
    'Huzur Sitesi': 'huzur_sitesi',
    'Umut Sitesi': 'umut_sitesi',
    'Papatya Sitesi': 'papatya_sitesi',
    'Poliklinik': 'poliklinik',
    'İdari Bina B Blok': 'idari_bina_b_blok',
    'Güven Sitesi': 'guven_sitesi',
}

# Get the building-specific retrofitting results
retrofitting_dir = r"C:\Users\said_\OneDrive\Masaüstü\Finance Project\Training\Retrofit Results"

for building_name, file_name in file_name_dict.items():
    data = pd.read_csv(os.path.join(retrofitting_dir, f"{file_name}.csv"))    

    baseline = data[data["Scenario"]=="Baseline"]["Annual Energy Consumption"]
    scenarios = data.loc[1:, "Annual Energy Consumption"]

    baseline_campus += baseline
    scenario_campus += scenarios

campus_df = pd.DataFrame(scenario_campus)
campus_df.rename(columns={scenario_campus.name: 'Energy'}, inplace=True)

# Add scenario column
campus_df["Scenario"] = [f"S{i}" for i in range(1, n_scenarios+1)]

# Add baseline row
baseline_row = {
    "Energy": baseline_campus.item(),
    "Scenario": "Baseline"
}

# Convert the baseline dictionary to a DataFrame
baseline_df = pd.DataFrame([baseline_row])

# Concatenate the new row at the top of the existing DataFrame
campus_df = pd.concat([baseline_df, campus_df], ignore_index=True)

# Calculate the change in energy based on the baseline scenario
baseline_energy = campus_df[campus_df["Scenario"]=="Baseline"]["Energy"].item()
change_list = list()
for i in range(campus_df.shape[0]):
    change = 100 * (baseline_energy - campus_df.loc[i, "Energy"]) / baseline_energy
    change_list.append(change)

# Re-order the columns of campus_df
campus_df["Change (%)"] = change_list
column_order = ["Scenario", "Energy", "Change (%)"]
campus_df = campus_df[column_order]

# Merge the retrofit results with the relevant parameter combinations
# Get a sample scenario df
sample_df = pd.read_csv(os.path.join(retrofitting_dir, "poliklinik.csv"))

# Merge the two df on the 'Scenario' column
merged_df = pd.merge(sample_df, campus_df, on='Scenario')

# Reorder the columns as specified
merged_df = merged_df[['Scenario', 'Wall-U', 'Roof-U', 'Window-U', 'LPD', 
                       'Heating Setpoint', 'Energy', 'Change (%)']]

ordered_retrofit = merged_df.sort_values(by="Change (%)", ascending=False)

# Save the ordered retrofit results to a CSV file
ordered_retrofit.to_csv("campus_retrofit_results.csv", index=False, encoding="utf-8")

In [54]:
ordered_retrofit.head(10)

Unnamed: 0,Scenario,Wall-U,Roof-U,Window-U,LPD,Heating Setpoint,Energy,Change (%)
115,S115,0.9,0.3,0.8,2.5,21,6699511.19,13.068367
112,S112,0.9,0.3,0.8,2.0,21,6717510.703,12.834808
127,S127,0.9,0.3,1.2,1.5,21,6719091.8,12.814292
130,S130,0.9,0.3,1.2,2.0,21,6730193.083,12.670244
133,S133,0.9,0.3,1.2,2.5,21,6730873.125,12.66142
109,S109,0.9,0.3,0.8,1.5,21,6742080.523,12.515995
124,S124,0.9,0.3,1.0,2.5,21,6747849.84,12.441133
121,S121,0.9,0.3,1.0,2.0,21,6767256.358,12.189318
118,S118,0.9,0.3,1.0,1.5,21,6767496.617,12.1862
131,S131,0.9,0.3,1.2,2.0,22,6792017.906,11.868016


# END