# **Building Energy Load Estimation With Linear Regression Models**

### In this study, it was tried to estimate the heat and cooling load of the buildings using linear regression models.

1. Load the dataset.

In [None]:
import pandas as pd
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
df = pd.read_csv("../input/eergy-efficiency-dataset/ENB2012_data.csv")
df.columns = ["Relative_Compactness", "Surface_Area", "Wall_Area", "Roof_Area", "Overall_Height","Orientation", "Glazing_Area", "Glazing_Area_Distribution", 
              "Heating_Load","Cooling_Load"] #Rename columns
df.head()

In [None]:
df.info() # Memory usage and data types

In [None]:
df.describe() #Summary statistics

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df.corr(),square=True,cmap="Blues", linecolor='w',annot=True, ax=ax) 

In [None]:
sns.pairplot(df)

2. We separate the dependent and independent variables into X and Y. We will first estimate the Heating Load.

In [None]:
X = df.drop(["Heating_Load","Cooling_Load"], axis=1) #dependent variables
y = df[["Heating_Load"]] #independent variable

In [None]:
X.head()

In [None]:
y.head()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 1) # Train test split

**Multiple regression**

In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.metrics import mean_squared_error
lm = LinearRegression()
model = lm.fit(X_train, y_train)

In [None]:
#train error
np.sqrt(mean_squared_error(y_train, model.predict(X_train)))

In [None]:
#test error
np.sqrt(mean_squared_error(y_test, model.predict(X_test)))

In [None]:
from sklearn.model_selection import cross_val_score
cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error") ##k-fold cross validation
##k-fold cross validation mse
np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")) #actual performance of our model

In [None]:
##k-fold cross validation rmse
np.sqrt(np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")))

In [None]:
model.score(X,y)

**Ridge Regression**

In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
from sklearn import model_selection
from sklearn.linear_model import RidgeCV

In [None]:
X = df.drop(["Heating_Load","Cooling_Load"], axis=1) #dependent variables
y = df[["Heating_Load"]] #independent variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42) # Train test split

In [None]:
ridge_model = Ridge(alpha = 5).fit(X_train, y_train)
ridge_model

In [None]:
ridge_model.intercept_

In [None]:
ridge_model.coef_

In [None]:
ridge_model = Ridge().fit(X_train, y_train)

In [None]:
y_pred = ridge_model.predict(X_train)
#train error
RMSE = np.sqrt(mean_squared_error(y_train, y_pred))
RMSE

In [None]:
np.sqrt(np.mean(-cross_val_score(ridge_model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")))

In [None]:
#test error
y_pred = ridge_model.predict(X_test)
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

In [None]:
ridge_model.score(X,y)

**Lasso Regression**

In [None]:
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
lasso_model = Lasso().fit(X_train, y_train)
lasso_model

In [None]:
lasso_model.intercept_

In [None]:
lasso_model.coef_

In [None]:
y_pred = lasso_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
r2_score(y_test, y_pred)

**ElasticNet Regression**

In [None]:
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
enet_model = ElasticNet().fit(X_train, y_train)

In [None]:
enet_model.coef_

In [None]:
enet_model.intercept_

In [None]:
y_pred = enet_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
r2_score(y_test, y_pred)

3. Now we will estimate the Cooling Load.

In [None]:
X = df.drop(["Heating_Load","Cooling_Load"], axis=1) #dependent variables
y = df[["Cooling_Load"]] #independent variable

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 1) # Train test split

**Multiple regression**

In [None]:
lm = LinearRegression()
model = lm.fit(X_train, y_train)

In [None]:
model.intercept_

In [None]:
model.coef_

In [None]:
#train error
np.sqrt(mean_squared_error(y_train, model.predict(X_train)))

In [None]:
#test error
np.sqrt(mean_squared_error(y_test, model.predict(X_test)))

In [None]:
cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error") ##k-fold cross validation
##k-fold cross validation mse
np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")) #actual performance of our model

In [None]:
##k-fold cross validation rmse
np.sqrt(np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")))

In [None]:
model.score(X,y)

**Ridge Regression**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42) # Train test split
ridge_model = Ridge(alpha = 5).fit(X_train, y_train)
ridge_model

In [None]:
ridge_model.intercept_

In [None]:
ridge_model.coef_

In [None]:
ridge_model = Ridge().fit(X_train, y_train)
y_pred = ridge_model.predict(X_train)
#train error
RMSE = np.sqrt(mean_squared_error(y_train, y_pred))
RMSE

In [None]:
np.sqrt(np.mean(-cross_val_score(ridge_model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")))

In [None]:
#test error
y_pred = ridge_model.predict(X_test)
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

In [None]:
ridge_model.score(X,y)

**Lasso Regression**

In [None]:
lasso_model = Lasso().fit(X_train, y_train)
lasso_model

In [None]:
lasso_model.intercept_

In [None]:
lasso_model.coef_

In [None]:
y_pred = lasso_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
r2_score(y_test, y_pred)

**ElasticNet Regression**

In [None]:
enet_model = ElasticNet().fit(X_train, y_train)

In [None]:
enet_model.intercept_

In [None]:
enet_model.coef_

In [None]:
y_pred = enet_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
r2_score(y_test, y_pred)

**Conclusion**

The following is the table when we look at the Test Error RMSE value in estimating both Heating Load and Cooling Load variables:

*   Multiple regression: 3.20, 3.64
*   Ridge Regression: 3.08, 3.24
*   Lasso Regression: 4.71, 4.61 
*   ElasticNet Regression: 4.56, 4.45

We can say that our model established with the Ridge Regression method is better.


