# Classification and Regression Trees
Classification and Regression Trees (CART) are a set of supervised learning models used for problems involving classification and regression. In this chapter, you'll be introduced to the CART algorithm.

In [19]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score , roc_auc_score , confusion_matrix , classification_report
import pandas as pd
import numpy as np

In [2]:
breast_cancer = datasets.load_breast_cancer()

In [3]:
breast_cancer.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [10]:
X = pd.DataFrame(breast_cancer.data,columns=breast_cancer.feature_names)
y = breast_cancer.target

In [11]:
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2,random_state=1,stratify=y)

In [12]:
dt = DecisionTreeClassifier(max_depth=3,random_state=123)

In [13]:
dt.fit(X_train,y_train)

DecisionTreeClassifier(max_depth=3, random_state=123)

In [17]:
y_pred = dt.predict(X_test)
y_pred_proba = dt.predict_proba(X_test)[:,1]

In [20]:
print(accuracy_score(y_test,y_pred))
print(roc_auc_score(y_test,y_pred_proba))
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

0.9473684210526315
0.9833002645502645
              precision    recall  f1-score   support

           0       0.95      0.90      0.93        42
           1       0.95      0.97      0.96        72

    accuracy                           0.95       114
   macro avg       0.95      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

[[38  4]
 [ 2 70]]


In [29]:
dt1 = DecisionTreeClassifier(random_state=123,criterion="entropy") ## "gini"

In [30]:
dt1.fit(X_train,y_train)

DecisionTreeClassifier(criterion='entropy', random_state=123)

In [31]:
y_pred = dt1.predict(X_test)
y_pred_proba = dt1.predict_proba(X_test)[:,1]

In [32]:
print(accuracy_score(y_test,y_pred))
print(roc_auc_score(y_test,y_pred_proba))
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

0.956140350877193
0.9553571428571429
              precision    recall  f1-score   support

           0       0.93      0.95      0.94        42
           1       0.97      0.96      0.97        72

    accuracy                           0.96       114
   macro avg       0.95      0.96      0.95       114
weighted avg       0.96      0.96      0.96       114

[[40  2]
 [ 3 69]]


## Train your first regression tree

In [12]:
from sklearn import datasets
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

In [3]:
boston = datasets.load_boston()

In [4]:
X = boston.data
y = boston.target

In [5]:
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2,random_state=123)

In [17]:
dt= DecisionTreeRegressor(max_depth=8,min_samples_leaf=0.13,random_state=123)
dt.fit(X_train,y_train)

DecisionTreeRegressor(max_depth=8, min_samples_leaf=0.13, random_state=123)

In [18]:
y_pred = dt.predict(X_test)

In [19]:
print(np.sqrt(mean_squared_error(y_test,y_pred)))

6.100571987761473


In [20]:
# Import mean_squared_error from sklearn.metrics as MSE
from sklearn.metrics import mean_squared_error as MSE

# Compute y_pred
y_pred = dt.predict(X_test)

# Compute mse_dt
mse_dt = MSE(y_test, y_pred)

# Compute rmse_dt
rmse_dt = (mse_dt)**0.5

# Print rmse_dt
print("Test set RMSE of dt: {:.2f}".format(rmse_dt))

Test set RMSE of dt: 6.10
