# Decision Tree vs. Random Forest

## Runtime Test

## 1) Intro and Setup

In [1]:
import pandas as pd
import numpy as np
import time

from sklearn import ensemble
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score

from IPython.core.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_rows', 100)

In [2]:
# Bring in the dataset
df = pd.read_csv('Titanic Train Clean.csv')
df.head()

Unnamed: 0,Survived,Age,Female,SibSp,Parch,Class,Fare,EmbC,EmbQ,EmbS
0,1,1,0,0,1,3,8.52,1,0,0
1,1,1,0,1,1,2,14.5,0,0,1
2,1,1,1,2,1,3,19.26,1,0,0
3,1,1,1,2,1,3,19.26,1,0,0
4,1,1,0,0,2,2,29.0,0,0,1


In [4]:
X = df.drop('Survived', 1)
Y = df['Survived']

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

## 2) Decision Tree

In [30]:
dtree = DecisionTreeClassifier()

start_time = time.time()

dtree.fit(X_train, Y_train)

runtime = (time.time() - start_time)
print('--- DECISION TREE TRAIN ---')
print('Runtime: ', round(runtime, 6), 'seconds')

--- DECISION TREE TRAIN ---
Runtime:  0.003435 seconds


In [34]:
start_time = time.time()
predictions = dtree.predict(X_test)

runtime = (time.time() - start_time)
print('--- DECISION TREE TEST ---')
print('Runtime: ', round(runtime, 6), 'seconds')
print()
print('CONFUSION MATRIX')
print(confusion_matrix(Y_test, predictions))
print()
print('CLASSIFICATION REPORT')
print(classification_report(Y_test, predictions))

--- DECISION TREE TEST ---
Runtime:  0.001774 seconds

CONFUSION MATRIX
[[112  25]
 [ 27  51]]

CLASSIFICATION REPORT
              precision    recall  f1-score   support

           0       0.81      0.82      0.81       137
           1       0.67      0.65      0.66        78

   micro avg       0.76      0.76      0.76       215
   macro avg       0.74      0.74      0.74       215
weighted avg       0.76      0.76      0.76       215



In [44]:
start_time = time.time()
crossval = cross_val_score(dtree, X, Y, cv=10)

runtime = (time.time() - start_time)
print()
print('--- DECISION TREE CROSSVALIDATION ---')
print('Runtime: ', round(runtime, 6), 'seconds')
print()

cvscore = 0
print(crossval)
for i in crossval:
    cvscore += i

cvscore = cvscore / len(crossval)
print()
print("Average Score: ", (cvscore * 100).round(3),'%')


--- DECISION TREE CROSSVALIDATION ---
Runtime:  0.070232 seconds

[0.45833333 0.51388889 0.79166667 0.79166667 0.63380282 0.64788732
 0.74647887 0.67605634 0.8028169  0.49295775]

Average Score:  65.556 %


## 3) Random Forest

In [45]:
rfc = RandomForestClassifier(n_estimators=100)

start_time = time.time()
rfc.fit(X_train, Y_train)

runtime = (time.time() - start_time)
print('--- RANDOM FOREST TRAIN ---')
print('Runtime: ', round(runtime, 6), 'seconds')

--- RANDOM FOREST TRAIN ---
Runtime:  0.220841 seconds


In [46]:
start_time = time.time()
rfc_pred = rfc.predict(X_test)

runtime = (time.time() - start_time)
print('--- RANDOM FOREST TEST ---')
print('Runtime: ', round(runtime, 6), 'seconds')
print()
print('CONFUSION MATRIX')
print(confusion_matrix(Y_test, rfc_pred))
print()
print('CLASSIFICATION REPORT')
print(classification_report(Y_test, rfc_pred))

--- RANDOM FOREST TEST ---
Runtime:  0.019956 seconds

CONFUSION MATRIX
[[116  21]
 [ 23  55]]

CLASSIFICATION REPORT
              precision    recall  f1-score   support

           0       0.83      0.85      0.84       137
           1       0.72      0.71      0.71        78

   micro avg       0.80      0.80      0.80       215
   macro avg       0.78      0.78      0.78       215
weighted avg       0.79      0.80      0.79       215



In [47]:
start_time = time.time()
crossval = cross_val_score(rfc, X, Y, cv=10)

runtime = (time.time() - start_time)
print()
print('--- RANDOM FOREST CROSSVALIDATION ---')
print('Runtime: ', round(runtime, 6), 'seconds')
print()

cvscore = 0
print(crossval)
for i in crossval:
    cvscore += i

cvscore = cvscore / len(crossval)
print()
print("Average Score: ", (cvscore * 100).round(3),'%')


--- RANDOM FOREST CROSSVALIDATION ---
Runtime:  1.833654 seconds

[0.47222222 0.61111111 0.81944444 0.83333333 0.67605634 0.8028169
 0.83098592 0.77464789 0.71830986 0.8028169 ]

Average Score:  73.417 %


## 4) Summary