# DesicionTreeRegressor

In [5]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
from mlscratchnk.supervised.random_forest import RandomForestRegressor as CustomRandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import train_test_split


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
from sklearn.datasets import load_boston
X, y = load_boston(return_X_y=True)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25)

(X_train.shape, X_valid.shape, y_train.shape, y_valid.shape)

((379, 13), (127, 13), (379,), (127,))

In [7]:

def test_model(model):
    model.fit(X_train, y_train)
    predicted_train = model.predict(X_train)
    predicted = model.predict(X_valid)
    depth = model.get_depth() if hasattr(model, 'get_depth') else None
    print(f'train score mse {mean_squared_error(y_train, predicted_train):.3f}, valid score mse {mean_squared_error(y_valid, predicted):.3f}, depth {depth}, name {model.__class__.__name__}')

In [12]:
%%time

from mlscratchnk.supervised.decision_tree import DecisionTreeRegressor as CustomDecisionTreeRegressor
model = CustomDecisionTreeRegressor(max_depth=10, debug=False, criterion='mse', random_state=1, splitter='random', min_impurity_decrease=0.002, min_samples_split=10, min_samples_leaf=4)
test_model(model)
#visualize_tree(model)



train score mse 19.017, valid score mse 19.583, depth 8, name DecisionTreeRegressor
Wall time: 72 ms


# RandomForestRegressor

In [9]:
%%time

model = CustomRandomForestRegressor(n_estimators=10, max_features='max', max_depth=10, random_state=1, min_impurity_decrease=0.002, min_samples_split=10, min_samples_leaf=4)

test_model(model)

train score mse 9.217, valid score mse 16.467, depth 10, name RandomForestRegressor
Wall time: 719 ms


# GradientBoostingRegressor

In [31]:
%%time

from mlscratchnk.supervised.gradient_boost import GradientBoostingRegressor
model = GradientBoostingRegressor(n_estimators=10, max_depth=4, random_state=1, learning_rate=1)

test_model(model)

train score mse 525.872, valid score mse 530.565, depth None, name GradientBoostingRegressor
Wall time: 234 ms


## Compare with sklearn implementations

In [38]:
%%time

model = DecisionTreeRegressor(max_depth=None, splitter='random', random_state=1, min_impurity_decrease=0.005, min_samples_split=10, min_samples_leaf=4, criterion='mse')

test_model(model)

model = RandomForestRegressor(n_estimators=10, max_depth=6, random_state=1, min_impurity_decrease=0.005, min_samples_split=10, min_samples_leaf=4)
test_model(model)


dummy_classifier = DummyRegressor()
test_model(dummy_classifier)

train score mse 12.729, valid score mse 16.685, depth 9, name DecisionTreeRegressor
train score mse 6.957, valid score mse 12.561, depth None, name RandomForestRegressor
train score mse 86.419, valid score mse 78.508, depth None, name DummyRegressor
Wall time: 34 ms


In [51]:
X_train.shape

(379, 13)