In [7]:
%matplotlib inline
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import graphviz

from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynomialFeatures, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, ShuffleSplit
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.pipeline import make_pipeline

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import seaborn as sns

In [8]:
data_boston = load_boston()
print(data_boston.DESCR)
print("Shape:", data_boston.data.shape)

X_train, X_test, y_train, y_test = train_test_split(data_boston.data, data_boston.target, test_size=0.20)

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [9]:
def PolynomialRegression(degree=2, linear_model=LinearRegression(), **kwargs):
    return make_pipeline(PolynomialFeatures(degree), linear_model(**kwargs))

def fit_model(X, y):
    # Create cross-validation sets from the training data
    cv_sets = ShuffleSplit(n_splits=10, test_size = 0.20, random_state = 0)
 
    #Creating a dictionary for the parameter degree with a range from 1 to 10 and fit_intercept of true or false
    params = {'polynomialfeatures__degree': [1,2,3,4]}

    best_models = []
    for m in [LinearRegression, Lasso, Ridge]:
        grid = GridSearchCV(PolynomialRegression(linear_model=m), param_grid = params, 
                         cv=cv_sets, 
                         scoring='r2',
                         return_train_score=True)
        grid = grid.fit(X, y)
        cv_results = pd.DataFrame(grid.cv_results_)
        print(m)
        print(cv_results[['params', 'mean_fit_time', 'mean_score_time', 'mean_test_score']].sort_values(by='mean_test_score', ascending=False))
        best_models.append(grid.best_estimator_)
    
    return best_models

#Fitting the training data using grid search
models = fit_model(X_train, y_train)

<class 'sklearn.linear_model.base.LinearRegression'>
                              params  mean_fit_time  mean_score_time  \
1  {'polynomialfeatures__degree': 2}       0.007115         0.002431   
0  {'polynomialfeatures__degree': 1}       0.002324         0.000949   
3  {'polynomialfeatures__degree': 4}       0.135308         0.048760   
2  {'polynomialfeatures__degree': 3}       0.055854         0.010824   

   mean_test_score  
1         0.732345  
0         0.717943  
3     -1441.482409  
2     -1597.457245  
<class 'sklearn.linear_model.coordinate_descent.Lasso'>
                              params  mean_fit_time  mean_score_time  \
1  {'polynomialfeatures__degree': 2}       0.037231         0.003431   
0  {'polynomialfeatures__degree': 1}       0.001051         0.000791   
2  {'polynomialfeatures__degree': 3}       0.199968         0.011112   
3  {'polynomialfeatures__degree': 4}       0.881153         0.033844   

   mean_test_score  
1         0.828389  
0         0.669705  
2

Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number1.686867e-19
  overwrite_a=False)
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number1.813703e-19
  overwrite_a=False)


<class 'sklearn.linear_model.ridge.Ridge'>
                              params  mean_fit_time  mean_score_time  \
1  {'polynomialfeatures__degree': 2}       0.003511         0.002150   
0  {'polynomialfeatures__degree': 1}       0.001305         0.000481   
2  {'polynomialfeatures__degree': 3}       0.040669         0.010342   
3  {'polynomialfeatures__degree': 4}       0.095353         0.044213   

   mean_test_score  
1         0.821751  
0         0.714930  
2        -5.665710  
3      -180.263430  


In [14]:
features_transformed = pd.DataFrame(data=data_boston.data, columns=data_boston.feature_names)

# Applying scaling using MinMaxScaler
scaler = MinMaxScaler()
features_transformed[data_boston.feature_names] = scaler.fit_transform(features_transformed)
print(features_transformed.head())

       CRIM    ZN     INDUS  CHAS       NOX        RM       AGE       DIS  \
0  0.000000  0.18  0.067815   0.0  0.314815  0.577505  0.641607  0.269203   
1  0.000236  0.00  0.242302   0.0  0.172840  0.547998  0.782698  0.348962   
2  0.000236  0.00  0.242302   0.0  0.172840  0.694386  0.599382  0.348962   
3  0.000293  0.00  0.063050   0.0  0.150206  0.658555  0.441813  0.448545   
4  0.000705  0.00  0.063050   0.0  0.150206  0.687105  0.528321  0.448545   

        RAD       TAX   PTRATIO         B     LSTAT  
0  0.000000  0.208015  0.287234  1.000000  0.089680  
1  0.043478  0.104962  0.553191  1.000000  0.204470  
2  0.043478  0.104962  0.553191  0.989737  0.063466  
3  0.086957  0.066794  0.648936  0.994276  0.033389  
4  0.086957  0.066794  0.648936  1.000000  0.099338  


In [15]:
X_train_scale, X_test_scale, y_train_scale, y_test_scale = train_test_split(features_transformed, 
                    data_boston.target, test_size = 0.2, random_state = 0)
models = fit_model(X_train_scale, y_train_scale)

<class 'sklearn.linear_model.base.LinearRegression'>
                              params  mean_fit_time  mean_score_time  \
1  {'polynomialfeatures__degree': 2}       0.006915         0.002533   
0  {'polynomialfeatures__degree': 1}       0.004193         0.001001   
3  {'polynomialfeatures__degree': 4}       0.137997         0.049908   
2  {'polynomialfeatures__degree': 3}       0.055528         0.013584   

   mean_test_score  
1         0.751177  
0         0.725487  
3       -10.466120  
2       -22.506637  
<class 'sklearn.linear_model.coordinate_descent.Lasso'>
                              params  mean_fit_time  mean_score_time  \
0  {'polynomialfeatures__degree': 1}       0.002511         0.000873   
2  {'polynomialfeatures__degree': 3}       0.021671         0.010345   
3  {'polynomialfeatures__degree': 4}       0.106638         0.041869   
1  {'polynomialfeatures__degree': 2}       0.005462         0.002647   

   mean_test_score  
0         0.292238  
2         0.290059  
3

In [29]:
print(models[0].named_steps['linearregression'].coef_.mean())
print(models[1].named_steps['lasso'].coef_)
print(models[2].named_steps['ridge'].coef_.mean())

25.452603718075174
[ 0.         -0.          0.         -0.          0.         -0.
  0.         -0.          0.         -0.         -2.42932061 -0.
  0.         -7.8146962 ]
-0.01674592237683825


In [12]:
features_transformed = pd.DataFrame(data=data_boston.data, columns=data_boston.feature_names)

# Applying scaling using MinMaxScaler to the log transformed data set and storing it in a new data frame called 'features_log_minmax_transform'
scaler = StandardScaler()
features_transformed[data_boston.feature_names] = scaler.fit_transform(features_transformed)
print(features_transformed.head())

X_train_scale, X_test_scale, y_train_scale, y_test_scale = train_test_split(features_transformed, 
                    data_boston.target, test_size = 0.2, random_state = 0)
models = fit_model(X_train_scale, y_train_scale)

       CRIM        ZN     INDUS      CHAS       NOX        RM       AGE  \
0 -0.417713  0.284830 -1.287909 -0.272599 -0.144217  0.413672 -0.120013   
1 -0.415269 -0.487722 -0.593381 -0.272599 -0.740262  0.194274  0.367166   
2 -0.415272 -0.487722 -0.593381 -0.272599 -0.740262  1.282714 -0.265812   
3 -0.414680 -0.487722 -1.306878 -0.272599 -0.835284  1.016303 -0.809889   
4 -0.410409 -0.487722 -1.306878 -0.272599 -0.835284  1.228577 -0.511180   

        DIS       RAD       TAX   PTRATIO         B     LSTAT  
0  0.140214 -0.982843 -0.666608 -1.459000  0.441052 -1.075562  
1  0.557160 -0.867883 -0.987329 -0.303094  0.441052 -0.492439  
2  0.557160 -0.867883 -0.987329 -0.303094  0.396427 -1.208727  
3  1.077737 -0.752922 -1.106115  0.113032  0.416163 -1.361517  
4  1.077737 -0.752922 -1.106115  0.113032  0.441052 -1.026501  
<class 'sklearn.linear_model.base.LinearRegression'>
                              params  mean_fit_time  mean_score_time  \
1  {'polynomialfeatures__degree': 2}    