In [None]:
import numpy as np
from sklearn.datasets import load_boston
try:
    from cart.utils import square_loss,rmse,display_seconds, train_test_split
    from cart.grid_search_cross_validation import grid_search_cross_validation
    from cart.regression_tree import Regression_Tree
except:
    print(
        'Warning: cart module not installed, importing functions / classes by changing working directory.\n'
        'To install the cart module, please follow the instructions in the README.'
    )
    import sys
    sys.path.append('../')
    from cart.utils import square_loss,rmse,display_seconds, train_test_split
    from cart.grid_search_cross_validation import grid_search_cross_validation
    from cart.regression_tree import Regression_Tree

## Load dataset and split into train and test

In [None]:
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8)

## Determine optimal model using k fold cross validation

In [None]:
parameter_dict = dict(max_depth  = np.array([2,3,4,5,6,7,8,9,10,11]))
cv_tree = grid_search_cross_validation(model_class = Regression_Tree,
                             parameter_dict = parameter_dict,
                             k = 5,
                             loss_metric = 'square_loss'
                            )
cv_tree = cv_tree.fit(x = X_train, y = y_train)

## Evaluate model using MSE and RMSE

In [None]:
print(f'Optimal tree depth: {cv_tree.depth}\n')
print('MSE:')
print(f"Training error (MSE): {square_loss(cv_tree.predict(X_train),y_train):.2f}")
print(f"Testing error (MSE):  {square_loss(cv_tree.predict(X_test),y_test):.2f}")
print('\nRMSE:')
print(f"Training error (RMSE): {rmse(cv_tree.predict(X_train),y_train):.2f}")
print(f"Testing error (RMSE): {rmse(cv_tree.predict(X_test),y_test):.2f}")