## sklearn.pipeline.Pipeline
* class sklearn.pipeline.Pipeline(steps, *, memory=None, verbose=False)

## sklearn.preprocessing.PolynomialFeatures
* class sklearn.preprocessing.PolynomialFeatures(degree=2, *, interaction_only=False, include_bias=True, order='C')

In [6]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np

In [7]:
def polynomial_func(X):
    y = 1 + 2*X + X**2 + X**3
    return y

model = Pipeline([('poly', PolynomialFeatures(degree=3)),('linear', LinearRegression())])

X = np.arange(4).reshape(2,2)
y = polynomial_func(X)

model = model.fit(X,y)
print(model.named_steps['linear'].coef_)

[[0.         0.02402402 0.02402402 0.04804805 0.07207207 0.0960961
  0.0960961  0.14414414 0.21621622 0.31231231]
 [0.         0.05705706 0.05705706 0.11411411 0.17117117 0.22822823
  0.22822823 0.34234234 0.51351351 0.74174174]]


In [8]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats

In [9]:
boston_df = pd.read_csv('./datasets/Boston.csv', index_col=0)
boston_df

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
503,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
504,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
505,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [10]:
y_target = boston_df['medv']
x_data = boston_df.drop('medv', axis=1)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test= train_test_split(x_data,y_target,test_size=0.3, random_state=156)

In [None]:
p_model = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X_train, y_train)
print(p_model.shape, X_train.shape)

In [11]:
p_model = Pipeline([('poly', PolynomialFeatures(degree=2, include_bias=False)),('linear', LinearRegression())])

In [13]:
from sklearn.metrics import mean_squared_error, r2_score

p_model.fit(X_train,y_train)
y_preds = p_model.predict(X_test)

mse = mean_squared_error(y_test, y_preds)
rmse = np.sqrt(mse)

print(mse, rmse, r2_score(y_test, y_preds))

15.555752313043662 3.9440781322184355 0.7816647162556334


In [14]:
p_model = Pipeline([('poly', PolynomialFeatures(degree=3, include_bias=False)),('linear', LinearRegression())])

In [15]:
from sklearn.metrics import mean_squared_error, r2_score

p_model.fit(X_train,y_train)
y_preds = p_model.predict(X_test)

mse = mean_squared_error(y_test, y_preds)
rmse = np.sqrt(mse)

print(mse, rmse, r2_score(y_test, y_preds))

79625.59379921248 282.18007335602647 -1116.5979319809026
