In [25]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.metrics import log_loss,accuracy_score
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split,GridSearchCV,KFold
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression,LogisticRegression,ElasticNet,Lasso,Ridge

In [2]:
data = pd.read_csv("Boston.csv")

In [3]:
X = data.drop("medv",axis =1)
y = data['medv']

In [4]:
kfold = KFold(n_splits=5,shuffle=True,random_state=23)

In [5]:

# Train,Test Split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=23)


### For Linear Regression

In [6]:
lr= LinearRegression()

In [7]:
model =  lr.fit(X_train,y_train)

In [8]:
y_pred  = model.predict(X_test)

### Elastic Net

In [45]:
elnet = ElasticNet()

In [46]:
l1_ratio = [0.1,0.25,0.5,0.8,0.9]
aplha=[0.1,0.5,1,2,2.5,3]

In [47]:
kfold = KFold(n_splits=5,shuffle=True,random_state=23)

In [48]:
scaler=MinMaxScaler()
pipe=Pipeline([('SCL',scaler),('EN',elnet)])
pipe.get_params()

{'memory': None,
 'steps': [('SCL', MinMaxScaler()), ('EN', ElasticNet())],
 'verbose': False,
 'SCL': MinMaxScaler(),
 'EN': ElasticNet(),
 'SCL__clip': False,
 'SCL__copy': True,
 'SCL__feature_range': (0, 1),
 'EN__alpha': 1.0,
 'EN__copy_X': True,
 'EN__fit_intercept': True,
 'EN__l1_ratio': 0.5,
 'EN__max_iter': 1000,
 'EN__positive': False,
 'EN__precompute': False,
 'EN__random_state': None,
 'EN__selection': 'cyclic',
 'EN__tol': 0.0001,
 'EN__warm_start': False}

In [49]:
params={'EN__l1_ratio':np.linspace(0.01,0.999,10),'EN__alpha':np.linspace(0.01,0.999,5),'SCL':[StandardScaler(),MinMaxScaler()]}

In [51]:
gcv = GridSearchCV(pipe,param_grid=params,cv=kfold,scoring="r2")
gcv.fit(X_train,y_train)

In [52]:
print(gcv.best_params_)
print(gcv.best_score_)

{'EN__alpha': 0.01, 'EN__l1_ratio': 0.7792222222222221, 'SCL': MinMaxScaler()}
0.7186758883805405


## For Lasso Regression and Ridge

In [15]:
lasso = Lasso()

In [16]:
params={'alpha':np.linspace(0.01,0.999,5)}

In [17]:
gcv = GridSearchCV(lasso,param_grid=params,cv=kfold,scoring="r2")
gcv.fit(X_train,y_train)

In [18]:
print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.01}
0.7132155522652753


In [19]:
ridge= Ridge()

In [20]:
gcv = GridSearchCV(ridge,param_grid=params,cv=kfold,scoring="r2")
gcv.fit(X_train,y_train)
print(gcv.best_params_)
print(gcv.best_score_)

{'alpha': 0.25725}
0.7137186068023665


For KNN

In [21]:
knr=KNeighborsRegressor()

In [22]:
params={'n_neighbors':np.arange(1,11)}

In [23]:
gcv = GridSearchCV(knr,param_grid=params,cv=kfold,scoring="r2")
gcv.fit(X,y)

In [24]:
print(gcv.best_params_)
print(gcv.best_score_)

{'n_neighbors': 3}
0.544002500199357


For Support Vector Regressor

In [26]:
svr = SVR()

In [32]:
scaler=MinMaxScaler()
pipe=Pipeline([('SCL',scaler),('SVR',svr)])
pipe.get_params()



{'memory': None,
 'steps': [('SCL', MinMaxScaler()), ('SVR', SVR())],
 'verbose': False,
 'SCL': MinMaxScaler(),
 'SVR': SVR(),
 'SCL__clip': False,
 'SCL__copy': True,
 'SCL__feature_range': (0, 1),
 'SVR__C': 1.0,
 'SVR__cache_size': 200,
 'SVR__coef0': 0.0,
 'SVR__degree': 3,
 'SVR__epsilon': 0.1,
 'SVR__gamma': 'scale',
 'SVR__kernel': 'rbf',
 'SVR__max_iter': -1,
 'SVR__shrinking': True,
 'SVR__tol': 0.001,
 'SVR__verbose': False}

In [35]:
params={'SVR__C':np.linspace(0.001,5,10),'SVR__epsilon':np.linspace(0.001,5,10),
        'SVR__kernel':['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
       'SCL':[StandardScaler(),MinMaxScaler()]}
gcv= GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='r2',n_jobs=3)

In [36]:
gcv.fit(X_train,y_train)

1000 fits failed out of a total of 5000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
800 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py", line 405, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py", line 217, in fit
    raise ValueError(
ValueError: Precomputed matrix must be a square matrix. Input is a 283x13 matrix.

-------------------------------------------------------------------------

In [37]:
print(gcv.best_params_)
print(gcv.best_score_)

{'SCL': MinMaxScaler(), 'SVR__C': 5.0, 'SVR__epsilon': 1.6673333333333331, 'SVR__kernel': 'poly'}
0.8399609344542774


### For Decision Tree Regressor

In [38]:
from sklearn.tree import DecisionTreeRegressor,plot_tree

In [39]:
dtr = DecisionTreeRegressor()

In [40]:
scaler=MinMaxScaler()
pipe=Pipeline([('SCL',scaler),('DTR',dtr)])
pipe.get_params()

{'memory': None,
 'steps': [('SCL', MinMaxScaler()), ('DTR', DecisionTreeRegressor())],
 'verbose': False,
 'SCL': MinMaxScaler(),
 'DTR': DecisionTreeRegressor(),
 'SCL__clip': False,
 'SCL__copy': True,
 'SCL__feature_range': (0, 1),
 'DTR__ccp_alpha': 0.0,
 'DTR__criterion': 'squared_error',
 'DTR__max_depth': None,
 'DTR__max_features': None,
 'DTR__max_leaf_nodes': None,
 'DTR__min_impurity_decrease': 0.0,
 'DTR__min_samples_leaf': 1,
 'DTR__min_samples_split': 2,
 'DTR__min_weight_fraction_leaf': 0.0,
 'DTR__random_state': None,
 'DTR__splitter': 'best'}

In [41]:
params={'DTR__max_depth':[2,3,4,5,6,7,8,9,10,None],
       'DTR__min_samples_split':[2,5,6,7,8,9,10],
       'DTR__min_samples_leaf':[1,3,5,7,10,15],
       'SCL':[StandardScaler(),MinMaxScaler()]}
gcv= GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='r2',n_jobs=3)

In [42]:
gcv.fit(X_train,y_train)

In [43]:
print(gcv.best_params_)
print(gcv.best_score_)

{'DTR__max_depth': 5, 'DTR__min_samples_leaf': 1, 'DTR__min_samples_split': 2, 'SCL': MinMaxScaler()}
0.7586618080290138
