In [156]:
import pandas as pd
import matplotlib.pyplot as plt

In [157]:
from sklearn.datasets import fetch_openml

boston = fetch_openml(name='boston', version=1, as_frame=True)

boston_df = boston.frame
boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [158]:
from sklearn.model_selection import train_test_split

X = boston_df.drop("MEDV", axis=1)
y = boston_df['MEDV']

In [159]:
X.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33


In [160]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

In [161]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=42)

In [162]:
regressor.fit(X_train, y_train)

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [163]:
y_pred = regressor.predict(X_test)

In [164]:
y_pred

array([21.6, 27.9,  8.1, 24.1, 15.2, 21.6, 19.5, 17.8, 21.4, 18.9, 20.5,
       19.5,  7.5, 21.2, 16.2, 22. , 20.5, 10.5, 43.1, 14.6, 24.7, 24.4,
       13.6, 20.6, 18.4, 14.9, 21.7, 13.4, 20.5, 22.7, 20.3, 22.9, 36.5,
       15.3, 17.3, 13.8, 31.2, 18.7, 21.4, 24.1, 23.7, 36.2, 35.2, 26.4,
       22. ,  8.5, 15.6, 24.1, 23.2, 24.5, 19.1, 35.1, 15.6, 29.4, 43.1,
       20.6, 17.8, 37.3, 22. , 19.4, 27.5, 24.8, 30.1, 18.2, 30.5, 16.5,
        9.5, 22.9, 32.5, 17.3, 22.6, 22. ,  8.4, 18.6, 20.6,  6.3, 19.8,
       35.2, 10.2, 14.4, 22. , 13.1, 17.5, 10.5, 20.3, 25.1, 15.2, 23. ,
       22.1, 16.6, 22.6,  7.5, 19.8, 17.5, 22.9, 19.8, 50. , 16.3, 11.8,
       16.3, 19. , 21.2, 14.6, 20.4, 23.7, 11.8, 20.4, 24.7, 19. , 22.9,
        8.4, 16.3, 22.2, 22.2, 31.7, 18.4, 46. , 14.8, 16.1, 23.7, 16.2,
       25. ,  8.3, 19.1, 24.7, 22.9, 23.3, 37.2, 17.5, 50. , 15.2, 24.7,
       18.2, 27.1, 14.6, 21.7, 20.1, 24.8, 24.5, 14.8, 21.4, 23.5, 17.5,
       16.5,  5.6, 18.9, 13.8, 14.6, 13.6, 44.8, 14

In [165]:
from sklearn.metrics import r2_score
score = r2_score(y_pred, y_test)

In [166]:
score

0.71087489081941

### `Hyperparameter Tunning`

In [167]:
parameter = {
    'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    'splitter': ['best', 'random'],
    'max_depth': [1,2,3,4,5,6,7,8,10,11,12],
    'max_features': ['sqrt', 'log2', None]
}

regressor = DecisionTreeRegressor()

In [None]:
from sklearn.model_selection import GridSearchCV
regressorcv = GridSearchCV(
    regressor, param_grid=parameter, cv=10, scoring='neg_mean_squared_error'
)

In [169]:
regressorcv.fit(X_train, y_train)

0,1,2
,estimator,DecisionTreeRegressor()
,param_grid,"{'criterion': ['squared_error', 'friedman_mse', ...], 'max_depth': [1, 2, ...], 'max_features': ['sqrt', 'log2', ...], 'splitter': ['best', 'random']}"
,scoring,'neg_mean_squared_error'
,n_jobs,
,refit,True
,cv,20
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,criterion,'poisson'
,splitter,'best'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [170]:
regressorcv.best_params_

{'criterion': 'poisson',
 'max_depth': 8,
 'max_features': None,
 'splitter': 'best'}

In [171]:
y_pred = regressorcv.predict(X_test)

In [172]:
r2_score(y_pred, y_test)

0.7676282569148734