In [24]:
import pandas as pd
from sklearn.impute import SimpleImputer  # Handling missing values
from sklearn.preprocessing import StandardScaler  # Handling feature  scaling
from sklearn.preprocessing import OrdinalEncoder  # ordinal encodind
## Pipelines 
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import numpy as np 
from sklearn.datasets import make_classification

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix

In [26]:
df = pd.read_excel("data\energy.xlsx")

In [27]:
df.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


Rename all columns

In [28]:
df.columns = ['relative_compactness', 'surface_area', 'wall_area', 'roof_area', 'overall_height',
                'orientation', 'glazing_area', 'glazing_area_distribution', 'heating_load', 'cooling_load']

In [29]:
df = df.drop(labels='orientation',axis=True)

In [43]:
df.head()

Unnamed: 0,relative_compactness,surface_area,wall_area,roof_area,overall_height,glazing_area,glazing_area_distribution,heating_load,cooling_load
0,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,0.0,0,20.84,28.28


In [44]:
df.isnull().sum()

relative_compactness         0
surface_area                 0
wall_area                    0
roof_area                    0
overall_height               0
glazing_area                 0
glazing_area_distribution    0
heating_load                 0
cooling_load                 0
dtype: int64

In [45]:
cols = df.columns
cols

Index(['relative_compactness', 'surface_area', 'wall_area', 'roof_area',
       'overall_height', 'glazing_area', 'glazing_area_distribution',
       'heating_load', 'cooling_load'],
      dtype='object')

In [46]:
df.head()

Unnamed: 0,relative_compactness,surface_area,wall_area,roof_area,overall_height,glazing_area,glazing_area_distribution,heating_load,cooling_load
0,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,0.0,0,20.84,28.28


In [47]:
## Independent and dependent feature
X = df.drop(labels=['heating_load','cooling_load'],axis=1)
y_heating = df['heating_load']
y_cooling = df['cooling_load']

In [48]:
## Numerical Pipeline
num_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('scaler',StandardScaler())

    ]

)


preprocessor=ColumnTransformer([
('num_pipeline',num_pipeline,cols)
])

In [49]:
## Train test split

from sklearn.model_selection import train_test_split

## Split data for heating_load target 

X_train,X_test,y_heating_train,y_heating_test=train_test_split(X,y_heating,test_size=0.30,random_state=123)

## Split data for cooling_load target 

X_train,X_test,y_cooling_train,y_cooling_test=train_test_split(X,y_cooling,test_size=0.30,random_state=123)

In [50]:
## Model Training

from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [51]:
## adjusted r2-score for heating_load target
def adj_r2_score(y_heating_true, y_heating_pred, n_features):
    r2 = r2_score(y_heating_true, y_heating_pred)
    adj_r2 = 1 - ((1 - r2) * (len(y_heating_true) - 1)) / (len(y_heating_true) - n_features - 1)
    return adj_r2


In [52]:
## adjusted r2-score for cooling_load target
def adj_r2_score(y_cooling_true, y_cooling_pred, n_features):
    r2 = r2_score(y_cooling_true, y_cooling_pred)
    adj_r2 = 1 - ((1 - r2) * (len(y_cooling_true) - 1)) / (len(y_cooling_true) - n_features - 1)
    return adj_r2


In [53]:
import numpy as np
def evaluate_model(true, predicted,n_features):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    adj_r2_square=adj_r2_score(true,predicted,n_features)
    return mae, rmse, r2_square,adj_r2_square

In [54]:
## Train multiple models for heating_load target

models={
    'LinearRegression':LinearRegression(),
    'Lasso':Lasso(),
    'Ridge':Ridge(),
    'Elasticnet':ElasticNet(),
    'Svm_regressor':SVR(),
    'Decesion_tree_regressor':DecisionTreeRegressor(),
    'Random_forest_regressor':RandomForestRegressor()
    
}
trained_model_list=[]
model_list=[]
r2_list=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_heating_train)

    #Make Predictions
    y_heating_pred=model.predict(X_test)

    mae, rmse, r2_square,adj_r2_square=evaluate_model(y_heating_test,y_heating_pred,X_train.shape[1])

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)
    print("Ajusted R2 score",adj_r2_square*100)

    r2_list.append(r2_square)
    
    print('='*35)
    print('\n')

LinearRegression
Model Training Performance
RMSE: 2.8214186373954955
MAE: 2.046789255399969
R2 score 91.64719409058195
Ajusted R2 score 91.38499838938947


Lasso
Model Training Performance
RMSE: 4.222283408505576
MAE: 2.982467197247822
R2 score 81.29351931920483
Ajusted R2 score 80.70632037406776


Ridge
Model Training Performance
RMSE: 2.848725913532617
MAE: 2.093494035760908
R2 score 91.48472533986445
Ajusted R2 score 91.21742972273015


Elasticnet
Model Training Performance
RMSE: 4.117376486911318
MAE: 2.9484473234510253
R2 score 82.2115343908232
Ajusted R2 score 81.65315206228402


Svm_regressor
Model Training Performance
RMSE: 5.140256680895547
MAE: 3.6791968870935303
R2 score 72.27529455480027
Ajusted R2 score 71.40501232109445


Decesion_tree_regressor
Model Training Performance
RMSE: 0.5267876164842842
MAE: 0.34449350649350663
R2 score 99.7088153730555
Ajusted R2 score 99.6996750484429


Random_forest_regressor
Model Training Performance
RMSE: 0.4978399178415182
MAE: 0.33371789

In [55]:
## Train multiple models for cooling_load target

models={
    'LinearRegression':LinearRegression(),
    'Lasso':Lasso(),
    'Ridge':Ridge(),
    'Elasticnet':ElasticNet(),
    'Svm_regressor':SVR(),
    'Decesion_tree_regressor':DecisionTreeRegressor(),
    'Random_forest_regressor':RandomForestRegressor()
    
}
trained_model_list=[]
model_list=[]
r2_list=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_cooling_train)

    #Make Predictions
    y_cooling_pred=model.predict(X_test)

    mae, rmse, r2_square,adj_r2_square=evaluate_model(y_cooling_test,y_cooling_pred,X_train.shape[1])

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)
    print("Ajusted R2 score",adj_r2_square*100)

    r2_list.append(r2_square)
    
    print('='*35)
    print('\n')

LinearRegression
Model Training Performance
RMSE: 3.1547084570941233
MAE: 2.21418507934707
R2 score 88.34581909838882
Ajusted R2 score 87.97999279206023


Lasso
Model Training Performance
RMSE: 4.047163424026593
MAE: 2.8739354645221376
R2 score 80.81928563026779
Ajusted R2 score 80.21720042583672


Ridge
Model Training Performance
RMSE: 3.1590763126232617
MAE: 2.2558269441319165
R2 score 88.31352513612481
Ajusted R2 score 87.94668511797626


Elasticnet
Model Training Performance
RMSE: 3.969257437034068
MAE: 2.840167569357663
R2 score 81.55061773445465
Ajusted R2 score 80.9714891431595


Svm_regressor
Model Training Performance
RMSE: 4.814376159628769
MAE: 3.4672288647480665
R2 score 72.85790697230168
Ajusted R2 score 72.00591302075958


Decesion_tree_regressor
Model Training Performance
RMSE: 1.954653104522324
MAE: 1.1367388167388168
R2 score 95.52593095157361
Ajusted R2 score 95.38548932225082


Random_forest_regressor
Model Training Performance
RMSE: 1.8577113125910671
MAE: 1.0974885