In [91]:
# for data analysis and manipulation.
import pandas as pd

# to have access to mathematical and statistical functions.
import numpy as np

# For data visualization
import matplotlib.pyplot as plt

#Missing data visualization module for Python.
import missingno as msno

#Statistical data visualization
import seaborn as sns

#to see correlation between features and how they affect each other
from scipy.stats import spearmanr

#Split arrays or matrices into random train and test subsets.
from sklearn.model_selection import train_test_split

#A random forest regressor. ==> https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
from sklearn.ensemble import RandomForestRegressor

#SVR algorithm regressor. ==> https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR
from sklearn.svm import SVR

#Coefficient of determination
from sklearn.metrics import r2_score

#MSE
from sklearn.metrics import mean_squared_error

#Exhaustive search over specified parameter values for an estimator. ==> https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
from sklearn.model_selection import GridSearchCV

In [92]:
# the dataset is in an excel file which we have to load it
Energy = pd.read_excel('/mnt/e/Education/M.Sc at UT/Projects/Energy consumption prediction/Iterative model/forGitHub/Datasets/BuildingEnergyConsumption2016_2018.xlsx')
Energy = Energy.set_index('Time')  # for further processing, we need to set index of each smaple with Time
Energy

Unnamed: 0_level_0,EnergyCons
Time,Unnamed: 1_level_1
2016-01-01 01:00:00,23.783228
2016-01-01 02:00:00,23.783228
2016-01-01 03:00:00,23.783228
2016-01-01 04:00:00,23.783228
2016-01-01 05:00:00,23.783228
...,...
2018-12-31 19:00:00,18.602723
2018-12-31 20:00:00,18.838200
2018-12-31 21:00:00,18.602723
2018-12-31 22:00:00,18.131768


In [93]:
#The inputs of our model are meteorological data reported by KNMI (https://www.knmi.nl/home)
knmi = pd.read_excel('/mnt/e/Education/M.Sc at UT/Projects/Energy consumption prediction/Iterative model/forGitHub/Datasets/WeatherData2016_2018.xlsx')
knmi = knmi.set_index('Time') # for further processing, we need to set index of each smaple with Time
knmi

Unnamed: 0_level_0,month,HH,TD,U,Temp,RH,Q,DR,FF,FX,P
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-01 01:00:00,1,1,38,82,6.6,0.82,0,0,30,70,10224
2016-01-01 02:00:00,1,2,43,83,7.0,0.83,0,0,40,80,10228
2016-01-01 03:00:00,1,3,46,91,5.9,0.91,0,0,30,80,10232
2016-01-01 04:00:00,1,4,36,96,4.2,0.96,0,0,20,40,10237
2016-01-01 05:00:00,1,5,37,98,4.0,0.98,0,0,20,30,10240
...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00,12,19,78,93,8.7,0.93,0,0,30,60,10341
2018-12-31 20:00:00,12,20,74,92,8.5,0.92,0,0,30,50,10338
2018-12-31 21:00:00,12,21,66,89,8.2,0.89,0,0,40,60,10336
2018-12-31 22:00:00,12,22,68,94,7.6,0.94,0,0,40,70,10332


In [94]:
knmi_Updated= knmi.loc[:, ~knmi.columns.isin(["TD","RH","DR","FX"])] # ~ sign drops the columns we select
knmi_Updated['U'] = 0.01*knmi_Updated['U'] #Transfering percentage to fraction
knmi_Updated 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  knmi_Updated['U'] = 0.01*knmi_Updated['U'] #Transfering percentage to fraction


Unnamed: 0_level_0,month,HH,U,Temp,Q,FF,P
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-01 01:00:00,1,1,0.82,6.6,0,30,10224
2016-01-01 02:00:00,1,2,0.83,7.0,0,40,10228
2016-01-01 03:00:00,1,3,0.91,5.9,0,30,10232
2016-01-01 04:00:00,1,4,0.96,4.2,0,20,10237
2016-01-01 05:00:00,1,5,0.98,4.0,0,20,10240
...,...,...,...,...,...,...,...
2018-12-31 19:00:00,12,19,0.93,8.7,0,30,10341
2018-12-31 20:00:00,12,20,0.92,8.5,0,30,10338
2018-12-31 21:00:00,12,21,0.89,8.2,0,40,10336
2018-12-31 22:00:00,12,22,0.94,7.6,0,40,10332


In [95]:
#Spliting data for training and testing train(80%) , test(20%)
from sklearn.model_selection import train_test_split
#Inputs: Updated Meteorological data (when TD,DR,FX,RH eliminated)
#Output: Building energy consumption 
X_train, X_test, y_train, y_test = train_test_split(knmi_Updated , Energy, test_size = 0.2, random_state = 0)
y_train
from sklearn.preprocessing import StandardScaler #standardizes the data to a range in which the mean is equal to 0 and the standard deviation is 1. It assumes the data is normally distributed.
sc1= StandardScaler() 
X1 = sc1.fit_transform(knmi_Updated)
X1 = sc1.fit_transform(knmi_Updated)


In [96]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense


# Splitting  dataset into training (80%) and test data (20%)
X_train, X_test, y_train, y_test = train_test_split(X1, Energy, test_size=0.2, random_state=0, shuffle=False) #X1: Knmi_updated scaled
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

# Scaling the data for RNN
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))

# Reshape data for RNN (samples, time steps, features)
X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# Building the RNN model
Ghaziasgar_and_Pourfayaz_RNN = Sequential()
Ghaziasgar_and_Pourfayaz_RNN.add(SimpleRNN(units=50, activation='relu', input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
Ghaziasgar_and_Pourfayaz_RNN.add(Dense(1))

Ghaziasgar_and_Pourfayaz_RNN.compile(optimizer='adam', loss='mean_squared_error')

# Fitting the model to the training data
Ghaziasgar_and_Pourfayaz_RNN.fit(X_train_scaled, y_train_scaled, epochs=100, batch_size=32)

# Predicting on the test data
Predicted_Test_RNN_Scaled = Ghaziasgar_and_Pourfayaz_RNN.predict(X_test_scaled)
Predicted_Test_RNN = scaler_y.inverse_transform(Predicted_Test_RNN_Scaled)

# Testing the model's accuracy on the test data
print('R2 : ',r2_score(y_test, Predicted_Test_RNN))
print('MSE : ',mean_squared_error(y_test, Predicted_Test_RNN))

Epoch 1/100


  super().__init__(**kwargs)


[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.3934
Epoch 2/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1739
Epoch 3/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.1568
Epoch 4/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.1527
Epoch 5/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 947us/step - loss: 0.1492
Epoch 6/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 959us/step - loss: 0.1468
Epoch 7/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 966us/step - loss: 0.1455
Epoch 8/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.1422
Epoch 9/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.1399
Epoch 10/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 934us/ste

In [97]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense

# Splitting dataset into training (80%) and test data (20%)
X_train, X_test, y_train, y_test = train_test_split(X1, Energy , test_size=0.2, random_state=0, shuffle=False) #X1: Knmi_updated scaled
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

# Scaling the data for ANN
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))

# Building the ANN model
Ghaziasgar_and_Pourfayaz_ANN = Sequential()
Ghaziasgar_and_Pourfayaz_ANN.add(Dense(units=50, activation='relu', input_dim=X_train_scaled.shape[1]))
Ghaziasgar_and_Pourfayaz_ANN.add(Dense(units=1))  # Output layer

Ghaziasgar_and_Pourfayaz_ANN.compile(optimizer='adam', loss='mean_squared_error')

# Fitting the model to the training data
Ghaziasgar_and_Pourfayaz_ANN.fit(X_train_scaled, y_train_scaled, epochs=100, batch_size=32)

# Predicting on the test data
Predicted_Test_ANN_Scaled = Ghaziasgar_and_Pourfayaz_ANN.predict(X_test_scaled)
Predicted_Test_ANN = scaler_y.inverse_transform(Predicted_Test_ANN_Scaled)

# Testing the model's accuracy on the test data
print('R2 : ',r2_score(y_test, Predicted_Test_ANN))
print('MSE : ',mean_squared_error(y_test, Predicted_Test_ANN))

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 882us/step - loss: 0.4173
Epoch 2/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 884us/step - loss: 0.1697
Epoch 3/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 868us/step - loss: 0.1536
Epoch 4/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 952us/step - loss: 0.1518
Epoch 5/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 789us/step - loss: 0.1490
Epoch 6/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 793us/step - loss: 0.1505
Epoch 7/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 867us/step - loss: 0.1453
Epoch 8/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 843us/step - loss: 0.1454
Epoch 9/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 896us/step - loss: 0.1396
Epoch 10/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[

In [98]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense


# Splitting dataset into training (80%) and test data (20%)
X_train, X_test, y_train, y_test = train_test_split(X1, Energy, test_size=0.2, random_state=0, shuffle=False) #X1: Knmi_updated scaled
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

# Scaling the data for DNN
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))

# Building the DNN model
Ghaziasgar_and_Pourfayaz_DNN = Sequential()

# Adding layers to the DNN
Ghaziasgar_and_Pourfayaz_DNN.add(Dense(units=128, activation='relu', input_dim=X_train_scaled.shape[1]))
Ghaziasgar_and_Pourfayaz_DNN.add(Dense(units=64, activation='relu'))
Ghaziasgar_and_Pourfayaz_DNN.add(Dense(units=32, activation='relu'))
Ghaziasgar_and_Pourfayaz_DNN.add(Dense(units=16, activation='relu'))
Ghaziasgar_and_Pourfayaz_DNN.add(Dense(units=1))  # Output layer for regression

Ghaziasgar_and_Pourfayaz_DNN.compile(optimizer='adam', loss='mean_squared_error')

# Fitting the model to the training data
Ghaziasgar_and_Pourfayaz_DNN.fit(X_train_scaled, y_train_scaled, epochs=100, batch_size=32)

# Predicting on the test data
Predicted_Test_DNN_Scaled = Ghaziasgar_and_Pourfayaz_DNN.predict(X_test_scaled)
Predicted_Test_DNN = scaler_y.inverse_transform(Predicted_Test_DNN_Scaled)

# Testing the model's accuracy on the test data
print('R2 : ', r2_score(y_test, Predicted_Test_DNN))
print('MSE : ', mean_squared_error(y_test, Predicted_Test_DNN))

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.2728
Epoch 2/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.1492
Epoch 3/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1404
Epoch 4/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1368
Epoch 5/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1307
Epoch 6/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1278
Epoch 7/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1280
Epoch 8/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1234
Epoch 9/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.1180
Epoch 10/100
[1m658/658[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss

In [99]:
R2Score_Models = {
    'RNN': r2_score(y_test, Predicted_Test_RNN),
    'ANN' : r2_score(y_test, Predicted_Test_ANN),
    'DNN': r2_score(y_test, Predicted_Test_DNN)

}

MSE_Models = {
    'RNN': mean_squared_error(y_test, Predicted_Test_RNN),
    'ANN' : mean_squared_error(y_test, Predicted_Test_ANN),
    'DNN': mean_squared_error(y_test, Predicted_Test_DNN)

}


# Combine R2 and MSE into a single DataFrame
metrics_df = pd.DataFrame({
    'R2_Score': R2Score_Models,
    'MSE': MSE_Models
})
metrics_df

Unnamed: 0,R2_Score,MSE
RNN,0.846315,8.16026
ANN,0.846295,8.161323
DNN,0.807116,10.241596
