In [143]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [144]:
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
# import warnings
# warnings.filterwarnings("ignore")


In [145]:
df= pd.read_excel('ENB2012_data.xlsx')
x= df.drop(columns=['Y1', 'Y2'], axis=1)
y1= df['Y1']
y2= df['Y2']


In [146]:
# x2_train, x2_test, y2_train, y2_test= train_test_split(x,y2, test_size=0.3, random_state=23)
# x1_train, x1_test, y1_train, y1_test= train_test_split(x,y1, test_size=0.3, random_state=23)
kfold= KFold(n_splits=5, shuffle=True, random_state=23)

Pipelines

<h1>Ridge</h1>

Y1

In [147]:
alpha = np.linspace(0.001,4,20)
solver=['sparse_cg', 'svd', 'cholesky', 'auto', 'lsqr', 'lbfgs', 'sag', 'saga']
params= {'alpha':alpha,'solver':solver}

ridge= Ridge()
gcv= GridSearchCV(ridge, param_grid=params, cv=kfold)

gcv.fit(x,y1)

print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.001, 'solver': 'svd'}
0.9143218854493969


Y2

In [148]:

gcv= GridSearchCV(ridge, param_grid=params, cv=kfold)

gcv.fit(x,y2)

print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.001, 'solver': 'svd'}
0.8851737878214685
<bound method BaseEstimator.get_params of GridSearchCV(cv=KFold(n_splits=5, random_state=23, shuffle=True),
             estimator=Ridge(),
             param_grid={'alpha': array([1.00000000e-03, 2.11473684e-01, 4.21947368e-01, 6.32421053e-01,
       8.42894737e-01, 1.05336842e+00, 1.26384211e+00, 1.47431579e+00,
       1.68478947e+00, 1.89526316e+00, 2.10573684e+00, 2.31621053e+00,
       2.52668421e+00, 2.73715789e+00, 2.94763158e+00, 3.15810526e+00,
       3.36857895e+00, 3.57905263e+00, 3.78952632e+00, 4.00000000e+00]),
                         'solver': ['sparse_cg', 'svd', 'cholesky', 'auto',
                                    'lsqr', 'lbfgs', 'sag', 'saga']})>


<h1>Lasso</h1>

Y1

In [149]:
alpha = np.linspace(0.001,4,20)
params= {'alpha':alpha}

lasso= Lasso()

gcv= GridSearchCV(lasso, param_grid=params, cv=kfold)

gcv.fit(x,y1)

print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.001}
0.9142338891739709
<bound method BaseEstimator.get_params of GridSearchCV(cv=KFold(n_splits=5, random_state=23, shuffle=True),
             estimator=Lasso(),
             param_grid={'alpha': array([1.00000000e-03, 2.11473684e-01, 4.21947368e-01, 6.32421053e-01,
       8.42894737e-01, 1.05336842e+00, 1.26384211e+00, 1.47431579e+00,
       1.68478947e+00, 1.89526316e+00, 2.10573684e+00, 2.31621053e+00,
       2.52668421e+00, 2.73715789e+00, 2.94763158e+00, 3.15810526e+00,
       3.36857895e+00, 3.57905263e+00, 3.78952632e+00, 4.00000000e+00])})>


In [158]:
print(lasso.get_params())

{'alpha': 1.0, 'copy_X': True, 'fit_intercept': True, 'max_iter': 1000, 'positive': False, 'precompute': False, 'random_state': None, 'selection': 'cyclic', 'tol': 0.0001, 'warm_start': False}


Y2

In [150]:
gcv= GridSearchCV(lasso, param_grid=params, cv=kfold)

gcv.fit(x,y2)

print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.001}
0.8850732625799566


<h1>Elastic</h1>

y1

In [151]:
alpha = np.linspace(0.001,4,20)
l1_rat = np.linspace(0.001,0.999,20)

elastic= ElasticNet()
params= {'alpha':alpha,'l1_ratio':l1_rat}
gcv= GridSearchCV(elastic, param_grid=params, cv=kfold)

gcv.fit(x,y1)

print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.001, 'l1_ratio': 0.999}
0.9142241849778937
<bound method BaseEstimator.get_params of GridSearchCV(cv=KFold(n_splits=5, random_state=23, shuffle=True),
             estimator=ElasticNet(),
             param_grid={'alpha': array([1.00000000e-03, 2.11473684e-01, 4.21947368e-01, 6.32421053e-01,
       8.42894737e-01, 1.05336842e+00, 1.26384211e+00, 1.47431579e+00,
       1.68478947e+00, 1.89526316e+00, 2.10573684e+00, 2.31621053e+00,
       2.52668421e+00, 2.73715789e+00, 2.94763158e+00, 3.15810526e+00,
       3.36857895e+00, 3.57905263e+00, 3.78952632e+00, 4.00000000e+00]),
                         'l1_ratio': array([0.001     , 0.05352632, 0.10605263, 0.15857895, 0.21110526,
       0.26363158, 0.31615789, 0.36868421, 0.42121053, 0.47373684,
       0.52626316, 0.57878947, 0.63131579, 0.68384211, 0.73636842,
       0.78889474, 0.84142105, 0.89394737, 0.94647368, 0.999     ])})>


In [152]:
gcv= GridSearchCV(elastic, param_grid=params, cv=kfold)

gcv.fit(x,y2)

print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.001, 'l1_ratio': 0.999}
0.8850607414530993


<h1>KNN</h1>

In [153]:
from sklearn.neighbors import KNeighborsRegressor

In [154]:
knn_pipeline=  Pipeline([(( "SCL" , StandardScaler())),('KNN',KNeighborsRegressor())])

params= {'KNN__n_neighbors': np.arange(1,11),
          'SCL':[StandardScaler(),MinMaxScaler()]}




y1

In [160]:
gcv = GridSearchCV(knn_pipeline, param_grid=params,
                   cv=kfold)
gcv.fit(x, y1)
print(gcv.best_params_)
print(gcv.best_score_)

{'KNN__n_neighbors': 1, 'SCL': StandardScaler()}
0.9584912431435416


y2

In [156]:
gcv = GridSearchCV(knn_pipeline, param_grid=params,
                   cv=kfold)
gcv.fit(x, y2)
print(gcv.best_params_)
print(gcv.best_score_)

{'KNN__n_neighbors': 4, 'SCL': StandardScaler()}
0.9341152063790255
