
# Decision Tree Regression


In [104]:
# IMPORT LIBRARIES
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor 
from sklearn.model_selection import GridSearchCV

from sklearn import metrics
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score

## E.2(a) - Listing data without Feature Selection

In [158]:
# IMPORT LISTING DATA
listings1 = pd.read_csv('a.csv')
listings1.head()

Unnamed: 0.1,Unnamed: 0,id,host_response_rate,host_is_superhost,host_total_listings_count,host_has_profile_pic,host_identity_verified,latitude,longitude,accommodates,...,price,minimum_nights,maximum_nights,has_availability,number_of_reviews,instant_bookable,ptype_code,rtype_code,ngtype_code,nbtype_code
0,0,49091,100,0,2.0,1.0,1.0,1.44255,103.7958,1,...,4.406719,180,360,1,1,0,-0.016218,0.849882,1.326759,2.181841
1,1,50646,0,0,1.0,1.0,1.0,1.33235,103.78521,2,...,4.382027,90,730,1,18,0,-0.016218,0.849882,-0.434121,-1.296388
2,2,56334,100,0,2.0,1.0,1.0,1.44246,103.79667,1,...,4.219508,6,14,1,20,0,-0.016218,0.849882,1.326759,2.181841
3,3,71609,100,0,8.0,1.0,1.0,1.34541,103.95712,6,...,5.187386,90,1125,1,20,1,1.242553,0.849882,0.446319,1.684951
4,4,71896,100,0,8.0,1.0,1.0,1.34567,103.95963,3,...,4.553877,90,1125,1,24,1,0.75841,0.849882,0.446319,1.684951


In [159]:
# DROP UNNECCESSARY COLUMNS
listings1 = listings1.drop(columns='Unnamed: 0')
listings1 = listings1.drop(columns='id')
listings1.head()

Unnamed: 0,host_response_rate,host_is_superhost,host_total_listings_count,host_has_profile_pic,host_identity_verified,latitude,longitude,accommodates,bathrooms_text,bedrooms,...,price,minimum_nights,maximum_nights,has_availability,number_of_reviews,instant_bookable,ptype_code,rtype_code,ngtype_code,nbtype_code
0,100,0,2.0,1.0,1.0,1.44255,103.7958,1,1.0,1.0,...,4.406719,180,360,1,1,0,-0.016218,0.849882,1.326759,2.181841
1,0,0,1.0,1.0,1.0,1.33235,103.78521,2,1.0,1.0,...,4.382027,90,730,1,18,0,-0.016218,0.849882,-0.434121,-1.296388
2,100,0,2.0,1.0,1.0,1.44246,103.79667,1,1.0,1.0,...,4.219508,6,14,1,20,0,-0.016218,0.849882,1.326759,2.181841
3,100,0,8.0,1.0,1.0,1.34541,103.95712,6,1.0,2.0,...,5.187386,90,1125,1,20,1,1.242553,0.849882,0.446319,1.684951
4,100,0,8.0,1.0,1.0,1.34567,103.95963,3,0.5,1.0,...,4.553877,90,1125,1,24,1,0.75841,0.849882,0.446319,1.684951


In [160]:
X = listings1.drop('price',axis=1)
y = listings1['price']

In [161]:
# Prepare the train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 42)

In [162]:
# Create the parameter grid for GridSearchCV
dt_param_grid = {
    'max_features': ['auto', 'sqrt', 'log2'], #The number of features to consider when looking for the best split
    'min_samples_split': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15], #The minimum number of samples required to split an internal node
    'min_samples_leaf':[1,2,3,4,5,6,7,8,9,10,11], #The minimum number of samples required to be at a leaf node
    'max_depth':[80, 90, 100, 110, 120] #The maximum depth of the tree
}

In [163]:
# Instantiate a Random Forest Regressor
dt_reg = DecisionTreeRegressor(random_state = 42)

In [164]:
# Setup grid search
dt_grid = GridSearchCV(estimator = dt_reg, param_grid = dt_param_grid, cv=5,  n_jobs=-1, verbose=2)

In [165]:
# Fit the grid search model on training dataset
dt_grid.fit(X_train, y_train)

Fitting 5 folds for each of 2475 candidates, totalling 12375 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 308 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done 1760 tasks      | elapsed:   17.6s
[Parallel(n_jobs=-1)]: Done 3440 tasks      | elapsed:   35.7s
[Parallel(n_jobs=-1)]: Done 5138 tasks      | elapsed:   54.6s
[Parallel(n_jobs=-1)]: Done 7328 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 9998 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 12375 out of 12375 | elapsed:  2.1min finished


GridSearchCV(cv=5, estimator=DecisionTreeRegressor(random_state=42), n_jobs=-1,
             param_grid={'max_depth': [80, 90, 100, 110, 120],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                              11],
                         'min_samples_split': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                               11, 12, 13, 14, 15]},
             verbose=2)

In [166]:
# Get best RF estimator based on best parameters, and use RF regressor to predict on test set
dt_best = dt_grid.best_estimator_
y_pred = dt_best.predict(X_test)

In [167]:
scores = cross_val_score(dt_best, X_train, y_train, cv=5)

In [168]:
# Calculate performance metrics
dt_dict1 = {'Model':'Decision Tree Regressor (a)',
          'R^2':metrics.r2_score(y_test, y_pred),
          'MAE':metrics.mean_absolute_error(y_test, y_pred),
          'MSE':metrics.mean_squared_error(y_test, y_pred),
          'RMSE':np.sqrt(metrics.mean_squared_error(y_test, y_pred)),
          'CVS':scores.mean()
          }


In [184]:
# Display model performance metrics
dt_reg_metrics1 = pd.DataFrame.from_dict(dt_dict1, orient = 'index').T
dt_reg_metrics1

Unnamed: 0,Model,R^2,MAE,MSE,RMSE,CVS
0,Decision Tree Regressor (a),0.611215,0.344715,0.265451,0.515219,0.607903


## E.2(b) - Listing data with Feature Selection

In [170]:
# IMPORT LISTING DATA
listings2 = pd.read_csv('b.csv')
listings2.head()

Unnamed: 0.1,Unnamed: 0,price,host_response_rate,host_is_superhost,latitude,accommodates,bathrooms_text,bedrooms,beds,minimum_nights,has_availability,number_of_reviews,instant_bookable,ptype_code,rtype_code,ngtype_code,nbtype_code
0,0,4.406719,100,0,1.44255,1,1.0,1.0,1.0,180,1,1,0,-0.016218,0.849882,1.326759,2.181841
1,1,4.382027,0,0,1.33235,2,1.0,1.0,1.0,90,1,18,0,-0.016218,0.849882,-0.434121,-1.296388
2,2,4.219508,100,0,1.44246,1,1.0,1.0,1.0,6,1,20,0,-0.016218,0.849882,1.326759,2.181841
3,3,5.187386,100,0,1.34541,6,1.0,2.0,3.0,90,1,20,1,1.242553,0.849882,0.446319,1.684951
4,4,4.553877,100,0,1.34567,3,0.5,1.0,1.0,90,1,24,1,0.75841,0.849882,0.446319,1.684951


In [171]:
# DROP UNNECCESSARY COLUMNS
listings2 = listings2.drop(columns='Unnamed: 0')
listings2.head()

Unnamed: 0,price,host_response_rate,host_is_superhost,latitude,accommodates,bathrooms_text,bedrooms,beds,minimum_nights,has_availability,number_of_reviews,instant_bookable,ptype_code,rtype_code,ngtype_code,nbtype_code
0,4.406719,100,0,1.44255,1,1.0,1.0,1.0,180,1,1,0,-0.016218,0.849882,1.326759,2.181841
1,4.382027,0,0,1.33235,2,1.0,1.0,1.0,90,1,18,0,-0.016218,0.849882,-0.434121,-1.296388
2,4.219508,100,0,1.44246,1,1.0,1.0,1.0,6,1,20,0,-0.016218,0.849882,1.326759,2.181841
3,5.187386,100,0,1.34541,6,1.0,2.0,3.0,90,1,20,1,1.242553,0.849882,0.446319,1.684951
4,4.553877,100,0,1.34567,3,0.5,1.0,1.0,90,1,24,1,0.75841,0.849882,0.446319,1.684951


In [172]:
X = listings2.drop('price',axis=1)
y = listings2['price']

In [173]:
# Prepare the train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 42)

In [174]:
# Create the parameter grid for GridSearchCV
dt_param_grid = {
    'max_features': ['auto', 'sqrt', 'log2'], #The number of features to consider when looking for the best split
    'min_samples_split': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15], #The minimum number of samples required to split an internal node
    'min_samples_leaf':[1,2,3,4,5,6,7,8,9,10,11], #The minimum number of samples required to be at a leaf node
    'max_depth':[80, 90, 100, 110, 120] #The maximum depth of the tree
}

In [175]:
# Instantiate a Random Forest Regressor
dt_reg = DecisionTreeRegressor(random_state = 42)

In [176]:
# Setup grid search
dt_grid = GridSearchCV(estimator = dt_reg, param_grid = dt_param_grid, cv=5,  n_jobs=-1, verbose=2)

In [177]:
# Fit the grid search model on training dataset
dt_grid.fit(X_train, y_train)

Fitting 5 folds for each of 2475 candidates, totalling 12375 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 358 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 2052 tasks      | elapsed:   15.7s
[Parallel(n_jobs=-1)]: Done 4894 tasks      | elapsed:   31.3s
[Parallel(n_jobs=-1)]: Done 8856 tasks      | elapsed:   51.2s
[Parallel(n_jobs=-1)]: Done 12375 out of 12375 | elapsed:  1.1min finished


GridSearchCV(cv=5, estimator=DecisionTreeRegressor(random_state=42), n_jobs=-1,
             param_grid={'max_depth': [80, 90, 100, 110, 120],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                              11],
                         'min_samples_split': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                               11, 12, 13, 14, 15]},
             verbose=2)

In [178]:
# Get best RF estimator based on best parameters, and use RF regressor to predict on test set
dt_best = dt_grid.best_estimator_
y_pred = dt_best.predict(X_test)

In [179]:
scores = cross_val_score(dt_best, X_train, y_train, cv=5)

In [180]:
# Calculate performance metrics
dt_dict2 = {'Model':'Decision Tree Regressor (b)',
          'R^2':metrics.r2_score(y_test, y_pred),
          'MAE':metrics.mean_absolute_error(y_test, y_pred),
          'MSE':metrics.mean_squared_error(y_test, y_pred),
          'RMSE':np.sqrt(metrics.mean_squared_error(y_test, y_pred)),
          'CVS':scores.mean()
          }


In [187]:
# Display model performance metrics
dt_reg_metrics2 = pd.DataFrame.from_dict(dt_dict2, orient = 'index').T
dt_reg_metrics2

Unnamed: 0,Model,R^2,MAE,MSE,RMSE,CVS
0,Decision Tree Regressor (b),0.579724,0.339768,0.286952,0.535679,0.605513



## E.2(c) -  Listing & Review data without Feature Selection


In [132]:
# IMPORT LISTING DATA
listings3 = pd.read_csv('c.csv')
listings3.head()

Unnamed: 0.1,Unnamed: 0,host_response_rate,host_is_superhost,host_total_listings_count,host_has_profile_pic,host_identity_verified,latitude,longitude,accommodates,bathrooms_text,...,minimum_nights,maximum_nights,has_availability,number_of_reviews,instant_bookable,ptype_code,rtype_code,ngtype_code,nbtype_code,avgcomp
0,0,100.0,0.0,2.0,1.0,1.0,1.44255,103.7958,1.0,1.0,...,180.0,360.0,1.0,1.0,0.0,-0.016218,0.849882,1.326759,2.181841,0.9615
1,1,0.0,0.0,1.0,1.0,1.0,1.33235,103.78521,2.0,1.0,...,90.0,730.0,1.0,18.0,0.0,-0.016218,0.849882,-0.434121,-1.296388,0.825117
2,2,100.0,0.0,2.0,1.0,1.0,1.44246,103.79667,1.0,1.0,...,6.0,14.0,1.0,20.0,0.0,-0.016218,0.849882,1.326759,2.181841,0.89919
3,3,100.0,0.0,8.0,1.0,1.0,1.34541,103.95712,6.0,1.0,...,90.0,1125.0,1.0,20.0,1.0,1.242553,0.849882,0.446319,1.684951,0.577285
4,4,100.0,0.0,8.0,1.0,1.0,1.34567,103.95963,3.0,0.5,...,90.0,1125.0,1.0,24.0,1.0,0.75841,0.849882,0.446319,1.684951,0.7472


In [133]:
# DROP UNNECCESSARY COLUMNS
listings3 = listings3.drop(columns='Unnamed: 0')
listings3.head()

Unnamed: 0,host_response_rate,host_is_superhost,host_total_listings_count,host_has_profile_pic,host_identity_verified,latitude,longitude,accommodates,bathrooms_text,bedrooms,...,minimum_nights,maximum_nights,has_availability,number_of_reviews,instant_bookable,ptype_code,rtype_code,ngtype_code,nbtype_code,avgcomp
0,100.0,0.0,2.0,1.0,1.0,1.44255,103.7958,1.0,1.0,1.0,...,180.0,360.0,1.0,1.0,0.0,-0.016218,0.849882,1.326759,2.181841,0.9615
1,0.0,0.0,1.0,1.0,1.0,1.33235,103.78521,2.0,1.0,1.0,...,90.0,730.0,1.0,18.0,0.0,-0.016218,0.849882,-0.434121,-1.296388,0.825117
2,100.0,0.0,2.0,1.0,1.0,1.44246,103.79667,1.0,1.0,1.0,...,6.0,14.0,1.0,20.0,0.0,-0.016218,0.849882,1.326759,2.181841,0.89919
3,100.0,0.0,8.0,1.0,1.0,1.34541,103.95712,6.0,1.0,2.0,...,90.0,1125.0,1.0,20.0,1.0,1.242553,0.849882,0.446319,1.684951,0.577285
4,100.0,0.0,8.0,1.0,1.0,1.34567,103.95963,3.0,0.5,1.0,...,90.0,1125.0,1.0,24.0,1.0,0.75841,0.849882,0.446319,1.684951,0.7472


In [134]:
X = listings3.drop('price',axis=1)
y = listings3['price']

In [135]:
# Prepare the train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 42)

In [136]:
# Create the parameter grid for GridSearchCV
dt_param_grid = {
    'max_features': ['auto', 'sqrt', 'log2'], #The number of features to consider when looking for the best split
    'min_samples_split': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15], #The minimum number of samples required to split an internal node
    'min_samples_leaf':[1,2,3,4,5,6,7,8,9,10,11], #The minimum number of samples required to be at a leaf node
    'max_depth':[80, 90, 100, 110, 120] #The maximum depth of the tree
}

In [137]:
# Instantiate a Random Forest Regressor
dt_reg = DecisionTreeRegressor(random_state = 42)

In [138]:
# Setup grid search
dt_grid = GridSearchCV(estimator = dt_reg, param_grid = dt_param_grid, cv=5,  n_jobs=-1, verbose=2)

In [139]:
# Fit the grid search model on training dataset
dt_grid.fit(X_train, y_train)

Fitting 5 folds for each of 2475 candidates, totalling 12375 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 608 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done 3512 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 8384 tasks      | elapsed:   40.6s
[Parallel(n_jobs=-1)]: Done 12375 out of 12375 | elapsed:  1.0min finished


GridSearchCV(cv=5, estimator=DecisionTreeRegressor(random_state=42), n_jobs=-1,
             param_grid={'max_depth': [80, 90, 100, 110, 120],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                              11],
                         'min_samples_split': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                               11, 12, 13, 14, 15]},
             verbose=2)

In [140]:
# Get best RF estimator based on best parameters, and use RF regressor to predict on test set
dt_best = dt_grid.best_estimator_
y_pred = dt_best.predict(X_test)

In [141]:
scores = cross_val_score(dt_best, X_train, y_train, cv=5)

In [144]:
# Calculate performance metrics
dt_dict3 = {'Model':'Decision Tree Regressor (c)',
          'R^2':metrics.r2_score(y_test, y_pred),
          'MAE':metrics.mean_absolute_error(y_test, y_pred),
          'MSE':metrics.mean_squared_error(y_test, y_pred),
          'RMSE':np.sqrt(metrics.mean_squared_error(y_test, y_pred)),
          'CVS':scores.mean()
          }


In [190]:
# Display model performance metrics
dt_reg_metrics3 = pd.DataFrame.from_dict(dt_dict3, orient = 'index').T
dt_reg_metrics3

Unnamed: 0,Model,R^2,MAE,MSE,RMSE,CVS
0,Decision Tree Regressor (c),0.667613,0.31,0.22861,0.478132,0.60818



## E.2(d) -  Listing & Review data with Feature Selection


In [146]:
# IMPORT LISTING DATA
listings4 = pd.read_csv('d.csv')
listings4.head()

Unnamed: 0.1,Unnamed: 0,price,host_response_rate,host_is_superhost,host_total_listings_count,latitude,accommodates,bathrooms_text,bedrooms,beds,minimum_nights,number_of_reviews,instant_bookable,ptype_code,rtype_code,ngtype_code,nbtype_code,avgcomp
0,0,4.406719,100.0,0.0,2.0,1.44255,1.0,1.0,1.0,1.0,180.0,1.0,0.0,-0.016218,0.849882,1.326759,2.181841,0.9615
1,1,4.382027,0.0,0.0,1.0,1.33235,2.0,1.0,1.0,1.0,90.0,18.0,0.0,-0.016218,0.849882,-0.434121,-1.296388,0.825117
2,2,4.219508,100.0,0.0,2.0,1.44246,1.0,1.0,1.0,1.0,6.0,20.0,0.0,-0.016218,0.849882,1.326759,2.181841,0.89919
3,3,5.187386,100.0,0.0,8.0,1.34541,6.0,1.0,2.0,3.0,90.0,20.0,1.0,1.242553,0.849882,0.446319,1.684951,0.577285
4,4,4.553877,100.0,0.0,8.0,1.34567,3.0,0.5,1.0,1.0,90.0,24.0,1.0,0.75841,0.849882,0.446319,1.684951,0.7472


In [147]:
# DROP UNNECCESSARY COLUMNS
listings4 = listings4.drop(columns='Unnamed: 0')
listings4.head()

Unnamed: 0,price,host_response_rate,host_is_superhost,host_total_listings_count,latitude,accommodates,bathrooms_text,bedrooms,beds,minimum_nights,number_of_reviews,instant_bookable,ptype_code,rtype_code,ngtype_code,nbtype_code,avgcomp
0,4.406719,100.0,0.0,2.0,1.44255,1.0,1.0,1.0,1.0,180.0,1.0,0.0,-0.016218,0.849882,1.326759,2.181841,0.9615
1,4.382027,0.0,0.0,1.0,1.33235,2.0,1.0,1.0,1.0,90.0,18.0,0.0,-0.016218,0.849882,-0.434121,-1.296388,0.825117
2,4.219508,100.0,0.0,2.0,1.44246,1.0,1.0,1.0,1.0,6.0,20.0,0.0,-0.016218,0.849882,1.326759,2.181841,0.89919
3,5.187386,100.0,0.0,8.0,1.34541,6.0,1.0,2.0,3.0,90.0,20.0,1.0,1.242553,0.849882,0.446319,1.684951,0.577285
4,4.553877,100.0,0.0,8.0,1.34567,3.0,0.5,1.0,1.0,90.0,24.0,1.0,0.75841,0.849882,0.446319,1.684951,0.7472


In [148]:
X = listings4.drop('price',axis=1)
y = listings4['price']

In [149]:
# Prepare the train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 42)

In [150]:
# Create the parameter grid for GridSearchCV
dt_param_grid = {
    'max_features': ['auto', 'sqrt', 'log2'], #The number of features to consider when looking for the best split
    'min_samples_split': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15], #The minimum number of samples required to split an internal node
    'min_samples_leaf':[1,2,3,4,5,6,7,8,9,10,11], #The minimum number of samples required to be at a leaf node
    'max_depth':[80, 90, 100, 110, 120] #The maximum depth of the tree
}

In [151]:
# Instantiate a Random Forest Regressor
dt_reg = DecisionTreeRegressor(random_state = 42)

In [152]:
# Setup grid search
dt_grid = GridSearchCV(estimator = dt_reg, param_grid = dt_param_grid, cv=5,  n_jobs=-1, verbose=2)

In [153]:
# Fit the grid search model on training dataset
dt_grid.fit(X_train, y_train)

Fitting 5 folds for each of 2475 candidates, totalling 12375 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 258 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done 1468 tasks      | elapsed:   12.0s
[Parallel(n_jobs=-1)]: Done 3498 tasks      | elapsed:   26.6s
[Parallel(n_jobs=-1)]: Done 6328 tasks      | elapsed:   43.4s
[Parallel(n_jobs=-1)]: Done 9978 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 12368 out of 12375 | elapsed:  1.3min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 12375 out of 12375 | elapsed:  1.3min finished


GridSearchCV(cv=5, estimator=DecisionTreeRegressor(random_state=42), n_jobs=-1,
             param_grid={'max_depth': [80, 90, 100, 110, 120],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                              11],
                         'min_samples_split': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                               11, 12, 13, 14, 15]},
             verbose=2)

In [154]:
# Get best RF estimator based on best parameters, and use RF regressor to predict on test set
dt_best = dt_grid.best_estimator_
y_pred = dt_best.predict(X_test)

In [155]:
scores = cross_val_score(dt_best, X_train, y_train, cv=5)

In [156]:
# Calculate performance metrics
dt_dict4 = {'Model':'Decision Tree Regressor (d)',
          'R^2':metrics.r2_score(y_test, y_pred),
          'MAE':metrics.mean_absolute_error(y_test, y_pred),
          'MSE':metrics.mean_squared_error(y_test, y_pred),
          'RMSE':np.sqrt(metrics.mean_squared_error(y_test, y_pred)),
          'CVS':scores.mean()
          }


In [193]:
# Display model performance metrics
dt_reg_metrics4 = pd.DataFrame.from_dict(dt_dict4, orient = 'index').T
dt_reg_metrics4

Unnamed: 0,Model,R^2,MAE,MSE,RMSE,CVS
0,Decision Tree Regressor (d),0.621231,0.343923,0.260511,0.510403,0.622962
