In [12]:
import pandas as pd
from sklearn.impute import SimpleImputer  # Handling missing values
from sklearn.preprocessing import StandardScaler  # Handling feature  scaling
from sklearn.preprocessing import OrdinalEncoder  # ordinal encodind
## Pipelines 
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [13]:
df = pd.read_excel("data\energy.xlsx")

In [14]:
df.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


Rename all columns

In [15]:
df.columns = ['relative_compactness', 'surface_area', 'wall_area', 'roof_area', 'overall_height',
                'orientation', 'glazing_area', 'glazing_area_distribution', 'heating_load', 'cooling_load']

In [16]:
df = df.drop(labels='orientation',axis=True)

In [17]:
df.head()

Unnamed: 0,relative_compactness,surface_area,wall_area,roof_area,overall_height,glazing_area,glazing_area_distribution,heating_load,cooling_load
0,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,0.0,0,20.84,28.28


In [22]:
cols = df.columns
cols

Index(['relative_compactness', 'surface_area', 'wall_area', 'roof_area',
       'overall_height', 'glazing_area', 'glazing_area_distribution',
       'heating_load', 'cooling_load'],
      dtype='object')

In [26]:
## Independent and dependent feature
X = df.drop(labels=['heating_load','cooling_load'],axis=1)
Y = df[['heating_load']]

In [27]:
## Numerical Pipeline
num_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('scaler',StandardScaler())

    ]

)


preprocessor=ColumnTransformer([
('num_pipeline',num_pipeline,cols)
])

In [31]:
## Train test split

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.30,random_state=30)
#X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
#X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())
X_train.head()

Unnamed: 0,relative_compactness,surface_area,wall_area,roof_area,overall_height,glazing_area,glazing_area_distribution
739,0.79,637.0,343.0,147.0,7.0,0.4,5
559,0.71,710.5,269.5,220.5,3.5,0.4,1
651,0.74,686.0,245.0,220.5,3.5,0.4,3
368,0.69,735.0,294.0,220.5,3.5,0.25,2
191,0.62,808.5,367.5,220.5,3.5,0.1,3


In [32]:
## Model Training

from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [34]:
regression=LinearRegression()
regression.fit(X_train,y_train)

In [35]:
regression.coef_

array([[-7.40137230e+01, -1.08935329e+12,  1.08935329e+12,
         2.17870659e+12,  3.97618074e+00,  1.97734286e+01,
         1.32092969e-01]])

In [36]:
regression.intercept_

array([100.12740537])

In [37]:
import numpy as np
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square

In [38]:
## Train multiple models

models={
    'LinearRegression':LinearRegression(),
    'Lasso':Lasso(),
    'Ridge':Ridge(),
    'Elasticnet':ElasticNet()
}
trained_model_list=[]
model_list=[]
r2_list=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    y_pred=model.predict(X_test)

    mae, rmse, r2_square=evaluate_model(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)

    r2_list.append(r2_square)
    
    print('='*35)
    print('\n')

LinearRegression
Model Training Performance
RMSE: 2.771953802439269
MAE: 1.913729662540378
R2 score 92.55572211399696


Lasso
Model Training Performance
RMSE: 4.434090037223354
MAE: 3.1237781139254635
R2 score 80.95155117699689


Ridge
Model Training Performance
RMSE: 2.8211423315806843
MAE: 2.0211594401439554
R2 score 92.28917945815891


Elasticnet
Model Training Performance
RMSE: 4.2820771983152435
MAE: 3.053173177236184
R2 score 82.23523014150412


