In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge, Lasso, ElasticNet

In [2]:
df = pd.read_csv("C:/Users/Administrator.DAI-PC2/Desktop/ML/Day 4/Boston.csv")

y = df['medv']
X = df.drop('medv', axis = 1)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24)

poly = PolynomialFeatures(degree =1)
lr = LinearRegression()

kfold = KFold(n_splits = 5, shuffle = True, random_state=24)


pipe = Pipeline([('POLY', poly), ('LR', lr)])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

result = cross_val_score(pipe, X, y, cv = kfold)
print(result)
print(result.mean())

[0.6504844  0.76694574 0.74848435 0.66869369 0.73519355]
0.713960346536948


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24)

poly2 = PolynomialFeatures(degree =2)
lr = LinearRegression()

kfold = KFold(n_splits = 5, shuffle = True, random_state=24)


pipe = Pipeline([('POLY', poly2), ('LR', lr)])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

result = cross_val_score(pipe, X, y, cv = kfold)
print(result)
print(result.mean())

[0.72181042 0.43783701 0.79916382 0.84629991 0.76581213]
0.7141846588789649


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24)

poly3 = PolynomialFeatures(degree =3)
lr = LinearRegression()

kfold = KFold(n_splits = 5, shuffle = True, random_state=24)


pipe = Pipeline([('POLY', poly3), ('LR', lr)])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

result = cross_val_score(pipe, X, y, cv = kfold)
print(result)
print(result.mean())

[-3367.72799869 -2479.85313027  -523.41723967 -7401.54215514
 -3297.54166456]
-3414.0164376678767


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24)

poly4 = PolynomialFeatures(degree = 4)
lr = LinearRegression()

kfold = KFold(n_splits = 5, shuffle = True, random_state=24)


pipe = Pipeline([('POLY', poly4), ('LR', lr)])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

result = cross_val_score(pipe, X, y, cv = kfold)
print(result)
print(result.mean())

[-30401.90128513   -249.30433724   -495.79988836    -46.40580219
   -362.55443682]
-6311.193149949859


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=24)

poly5 = PolynomialFeatures(degree = 5)
lr = LinearRegression()

kfold = KFold(n_splits = 5, shuffle = True, random_state=24)


pipe = Pipeline([('POLY', poly5), ('LR', lr)])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

result = cross_val_score(pipe, X, y, cv = kfold)
print(result)
print(result.mean())

[-26477.97291233   -725.4143353    -365.23305994   -237.5466713
   -888.06631627]
-5738.846659029643


In [8]:
degrees = [1, 2, 3, 4, 5]
scores = []
for i in degrees :
    polyf = PolynomialFeatures(degree = i)
    pipe = Pipeline([('POLY', polyf), ('LR', lr)])
    result = cross_val_score(pipe, X, y, cv = kfold)
    scores.append(result.mean())

i_max = np.argmax(scores)
print("Best degree : ", degrees[i_max])
print("Best score : ", scores[i_max])

Best degree :  2
Best score :  0.7141846588789649


In [9]:
# using grid search

print(pipe.get_params())
params = {'POLY__degree': [1, 2, 3, 4, 5]}
gcv = GridSearchCV(pipe, param_grid = params, cv = kfold)
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)

{'memory': None, 'steps': [('POLY', PolynomialFeatures(degree=5)), ('LR', LinearRegression())], 'verbose': False, 'POLY': PolynomialFeatures(degree=5), 'LR': LinearRegression(), 'POLY__degree': 5, 'POLY__include_bias': True, 'POLY__interaction_only': False, 'POLY__order': 'C', 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__n_jobs': None, 'LR__positive': False}
0.7141846588789649
{'POLY__degree': 2}


In [10]:
#ridge
ridge = Ridge()
poly = PolynomialFeatures()
pipe = Pipeline([('POLY', poly), ('LR', ridge)])
print(pipe.get_params())
params = {'POLY__degree': [1, 2, 3, 4, 5],
          'LR__alpha':np.linspace(0.001, 5,10)}
gcv = GridSearchCV(pipe, param_grid = params, cv = kfold)
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)

{'memory': None, 'steps': [('POLY', PolynomialFeatures()), ('LR', Ridge())], 'verbose': False, 'POLY': PolynomialFeatures(), 'LR': Ridge(), 'POLY__degree': 2, 'POLY__include_bias': True, 'POLY__interaction_only': False, 'POLY__order': 'C', 'LR__alpha': 1.0, 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__max_iter': None, 'LR__positive': False, 'LR__random_state': None, 'LR__solver': 'auto', 'LR__tol': 0.0001}


  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T
  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


0.8023916129595982
{'LR__alpha': 5.0, 'POLY__degree': 2}




In [11]:
print(gcv.best_score_)
print(gcv.best_params_)

0.8023916129595982
{'LR__alpha': 5.0, 'POLY__degree': 2}


In [12]:
#lasso
lasso = Lasso()
poly = PolynomialFeatures()
pipe = Pipeline([('POLY', poly), ('LR', lasso)])
print(pipe.get_params())
params = {'POLY__degree': [1, 2, 3, 4, 5],
          'LR__alpha':np.linspace(0.001, 5,10)}
gcv = GridSearchCV(pipe, param_grid = params, cv = kfold)
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)

best_model = gcv_lass.best_estimator_
print(best_model.named_steps.LR.coef_)
print(best_model.named_steps.LR.inercept_)

{'memory': None, 'steps': [('POLY', PolynomialFeatures()), ('LR', Lasso())], 'verbose': False, 'POLY': PolynomialFeatures(), 'LR': Lasso(), 'POLY__degree': 2, 'POLY__include_bias': True, 'POLY__interaction_only': False, 'POLY__order': 'C', 'LR__alpha': 1.0, 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__max_iter': 1000, 'LR__positive': False, 'LR__precompute': False, 'LR__random_state': None, 'LR__selection': 'cyclic', 'LR__tol': 0.0001, 'LR__warm_start': False}


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

0.830434967961206
{'LR__alpha': 0.5564444444444444, 'POLY__degree': 2}


NameError: name 'gcv_lass' is not defined

In [13]:
print(gcv.best_score_)
print(gcv.best_params_)

0.830434967961206
{'LR__alpha': 0.5564444444444444, 'POLY__degree': 2}


In [15]:
#elasticnet
elastic = ElasticNet()
poly = PolynomialFeatures()
pipe = Pipeline([('POLY', poly), ('LR', elastic)])
print(pipe.get_params())
params = {'POLY__degree': [1, 2, 3],
          'LR__alpha':np.linspace(0.001, 5,10),
         'LR__l1_ratio':np.linspace(0, 1,5)}
gcv = GridSearchCV(pipe, param_grid = params, cv = kfold)
gcv.fit(X, y)
print(gcv.best_score_)
print(gcv.best_params_)



{'memory': None, 'steps': [('POLY', PolynomialFeatures()), ('LR', ElasticNet())], 'verbose': False, 'POLY': PolynomialFeatures(), 'LR': ElasticNet(), 'POLY__degree': 2, 'POLY__include_bias': True, 'POLY__interaction_only': False, 'POLY__order': 'C', 'LR__alpha': 1.0, 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__l1_ratio': 0.5, 'LR__max_iter': 1000, 'LR__positive': False, 'LR__precompute': False, 'LR__random_state': None, 'LR__selection': 'cyclic', 'LR__tol': 0.0001, 'LR__warm_start': False}


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

0.830434967961206
{'LR__alpha': 0.5564444444444444, 'LR__l1_ratio': 1.0, 'POLY__degree': 2}


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
