Tasks

\Cases\Energy Efficiency

Build separately following models on y1 and y2 with hyper-parameter tuning
* Ridge
* Lasso
* ElasticNet
* K-NN regression ( with scaling)


In [33]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV,KFold
from sklearn.linear_model import Ridge,Lasso,ElasticNet
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.pipeline import Pipeline

In [2]:
energy = pd.read_excel("ENB2012_data.xlsx",sheet_name='sheet1')
energy.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


In [3]:
X=energy.drop(['Y1','Y2'],axis=1)
y1=energy['Y1']
y2=energy['Y2']

## 1. Ridge Regression

### For Y1

In [9]:
ridge1=Ridge()

In [12]:
kfold=KFold(n_splits=5,shuffle=True,random_state=23)
params={'alpha':np.linspace(0.1,5,10)}
gcv=GridSearchCV(ridge1,param_grid=params,cv=kfold,scoring='r2')
gcv.fit(X,y1)

In [13]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'alpha': 0.1}
best score : 0.9127175741410495


### For Y2

In [14]:
ridge2=Ridge()

In [15]:
kfold=KFold(n_splits=5,shuffle=True,random_state=23)
params={'alpha':np.linspace(0.1,5,10)}
gcv=GridSearchCV(ridge1,param_grid=params,cv=kfold,scoring='r2')
gcv.fit(X,y2)

In [16]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'alpha': 0.1}
best score : 0.8829982639541427


## 2. Lasso Regression

### For Y1

In [17]:
lasso1=Lasso()

In [18]:
kfold=KFold(n_splits=5,shuffle=True,random_state=23)
params={'alpha':np.linspace(0.1,5,10)}
gcv=GridSearchCV(lasso1,param_grid=params,cv=kfold,scoring='r2')
gcv.fit(X,y1)

In [19]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'alpha': 0.1}
best score : 0.9035164622984215


### For Y2

In [20]:
lasso2=Lasso()

In [21]:
kfold=KFold(n_splits=5,shuffle=True,random_state=23)
params={'alpha':np.linspace(0.1,5,10)}
gcv=GridSearchCV(lasso1,param_grid=params,cv=kfold,scoring='r2')
gcv.fit(X,y2)

In [22]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'alpha': 0.1}
best score : 0.871730702676469


## 3. ElasticNet Regression

### For Y1

In [23]:
elasticnet1=ElasticNet()

In [25]:
kfold=KFold(n_splits=5,shuffle=True,random_state=23)
params={'alpha':np.linspace(0.1,5,10),'l1_ratio':np.linspace(0.1,5,10)}
gcv=GridSearchCV(elasticnet1,param_grid=params,cv=kfold,scoring='r2')
gcv.fit(X,y1)

400 fits failed out of a total of 500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
50 fits failed with the following error:
Traceback (most recent call last):
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/utils/_param_validation.py", line 96, in validate_parameter_constraints


In [26]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'alpha': 0.1, 'l1_ratio': 0.6444444444444445}
best score : 0.870893408699378


### For Y2

In [27]:
elasticnet2=ElasticNet()

In [28]:
kfold=KFold(n_splits=5,shuffle=True,random_state=23)
params={'alpha':np.linspace(0.1,5,10),'l1_ratio':np.linspace(0.1,5,10)}
gcv=GridSearchCV(elasticnet2,param_grid=params,cv=kfold,scoring='r2')
gcv.fit(X,y2)

400 fits failed out of a total of 500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
50 fits failed with the following error:
Traceback (most recent call last):
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/utils/_param_validation.py", line 96, in validate_parameter_constraints


In [29]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'alpha': 0.1, 'l1_ratio': 0.6444444444444445}
best score : 0.8520230667122014


## 4. KNN Regression

### For Y1

In [31]:
kfold = KFold(n_splits=5,shuffle=True,random_state=23)
knr=KNeighborsRegressor()

In [34]:
scaler = StandardScaler()
pipe=Pipeline([("SCL",scaler),('KNR',knr)])

In [35]:
pipe.get_params()

{'memory': None,
 'steps': [('SCL', StandardScaler()), ('KNR', KNeighborsRegressor())],
 'verbose': False,
 'SCL': StandardScaler(),
 'KNR': KNeighborsRegressor(),
 'SCL__copy': True,
 'SCL__with_mean': True,
 'SCL__with_std': True,
 'KNR__algorithm': 'auto',
 'KNR__leaf_size': 30,
 'KNR__metric': 'minkowski',
 'KNR__metric_params': None,
 'KNR__n_jobs': None,
 'KNR__n_neighbors': 5,
 'KNR__p': 2,
 'KNR__weights': 'uniform'}

In [36]:
params={'KNR__n_neighbors':np.arange(1,11),'SCL':[StandardScaler(),MinMaxScaler()]}

In [37]:
gcv = GridSearchCV(pipe,param_grid=params,cv=kfold,scoring="r2")
gcv.fit(X,y1)

In [38]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'KNR__n_neighbors': 1, 'SCL': StandardScaler()}
best score : 0.9584912431435416


### For Y2

In [39]:
kfold = KFold(n_splits=5,shuffle=True,random_state=23)
knr=KNeighborsRegressor()

In [40]:
scaler = StandardScaler()
pipe=Pipeline([("SCL",scaler),('KNR',knr)])

In [41]:
params={'KNR__n_neighbors':np.arange(1,11),'SCL':[StandardScaler(),MinMaxScaler()]}

In [42]:
gcv = GridSearchCV(pipe,param_grid=params,cv=kfold,scoring="r2")
gcv.fit(X,y2)

In [43]:
print("best parameter :",gcv.best_params_)
print("best score :",gcv.best_score_)

best parameter : {'KNR__n_neighbors': 4, 'SCL': StandardScaler()}
best score : 0.9341152063790255
