In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn import ensemble
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error

In [2]:
df = pd.read_csv((
    "https://raw.githubusercontent.com/Thinkful-Ed/data-201-resources/"
    "master/ESS_practice_data/ESSdata_Thinkful.csv")).dropna()

# Definine outcome and predictors.
# Set our outcome to 0 and 1.
y = df['partner'] - 1
X = df.loc[:, ~df.columns.isin(['partner', 'cntry', 'idno'])]

# Make the categorical variable 'country' into dummies.
X = pd.concat([X, pd.get_dummies(df['cntry'])], axis=1)

# Create training and test sets.
offset = int(X.shape[0] * 0.9)

# Put 90% of the data in the training set.
X_train, y_train = X[:offset], y[:offset]

# And put 10% in the test set.
X_test, y_test = X[offset:], y[offset:]

## Change Subsample

In [4]:
# Initialize and fit the model.
clf = ensemble.GradientBoostingClassifier(subsample = 0.5, n_estimators = 500)
clf.fit(X_train, y_train)

predict_train = clf.predict(X_train)
predict_test = clf.predict(X_test)

# Accuracy tables.
table_train = pd.crosstab(y_train, predict_train, margins=True)
table_test = pd.crosstab(y_test, predict_test, margins=True)

train_tI_errors = table_train.loc[0.0,1.0] / table_train.loc['All','All']
train_tII_errors = table_train.loc[1.0,0.0] / table_train.loc['All','All']

test_tI_errors = table_test.loc[0.0,1.0]/table_test.loc['All','All']
test_tII_errors = table_test.loc[1.0,0.0]/table_test.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors, train_tII_errors, test_tI_errors, test_tII_errors))

Training set accuracy:
Percent Type I errors: 0.03627932351336607
Percent Type II errors: 0.14729950900163666

Test set accuracy:
Percent Type I errors: 0.07730061349693251
Percent Type II errors: 0.18773006134969325


In [5]:
# Initialize and fit the model.
clf1 = ensemble.GradientBoostingClassifier(subsample = 0.4, n_estimators = 500)
clf1.fit(X_train, y_train)

predict_train1 = clf1.predict(X_train)
predict_test1 = clf1.predict(X_test)

# Accuracy tables.
table_train1 = pd.crosstab(y_train, predict_train1, margins=True)
table_test1 = pd.crosstab(y_test, predict_test1, margins=True)

train_tI_errors1 = table_train1.loc[0.0,1.0] / table_train1.loc['All','All']
train_tII_errors1 = table_train1.loc[1.0,0.0] / table_train1.loc['All','All']

test_tI_errors1 = table_test1.loc[0.0,1.0]/table_test1.loc['All','All']
test_tII_errors1 = table_test1.loc[1.0,0.0]/table_test1.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors1, train_tII_errors1, test_tI_errors1, test_tII_errors1))

Training set accuracy:
Percent Type I errors: 0.03859792689579924
Percent Type II errors: 0.14961811238406983

Test set accuracy:
Percent Type I errors: 0.08834355828220859
Percent Type II errors: 0.18036809815950922


In [6]:
# Initialize and fit the model.
clf2 = ensemble.GradientBoostingClassifier(subsample = 0.1, n_estimators = 500)
clf2.fit(X_train, y_train)

predict_train2 = clf2.predict(X_train)
predict_test2 = clf2.predict(X_test)

# Accuracy tables.
table_train2 = pd.crosstab(y_train, predict_train2, margins=True)
table_test2 = pd.crosstab(y_test, predict_test2, margins=True)

train_tI_errors2 = table_train2.loc[0.0,1.0] / table_train2.loc['All','All']
train_tII_errors2 = table_train2.loc[1.0,0.0] / table_train2.loc['All','All']

test_tI_errors2 = table_test2.loc[0.0,1.0]/table_test2.loc['All','All']
test_tII_errors2 = table_test2.loc[1.0,0.0]/table_test2.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors2, train_tII_errors2, test_tI_errors2, test_tII_errors2))

Training set accuracy:
Percent Type I errors: 0.05837424986361157
Percent Type II errors: 0.15780141843971632

Test set accuracy:
Percent Type I errors: 0.10920245398773006
Percent Type II errors: 0.17668711656441718


In [7]:
# Initialize and fit the model.
clf3 = ensemble.GradientBoostingClassifier(subsample = 0.05, n_estimators = 500)
clf3.fit(X_train, y_train)

predict_train3 = clf3.predict(X_train)
predict_test3 = clf3.predict(X_test)

# Accuracy tables.
table_train3 = pd.crosstab(y_train, predict_train3, margins=True)
table_test3 = pd.crosstab(y_test, predict_test3, margins=True)

train_tI_errors3 = table_train3.loc[0.0,1.0] / table_train3.loc['All','All']
train_tII_errors3 = table_train3.loc[1.0,0.0] / table_train3.loc['All','All']

test_tI_errors3 = table_test3.loc[0.0,1.0]/table_test3.loc['All','All']
test_tII_errors3 = table_test3.loc[1.0,0.0]/table_test3.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors3, train_tII_errors3, test_tI_errors3, test_tII_errors3))

Training set accuracy:
Percent Type I errors: 0.11865793780687398
Percent Type II errors: 0.17866884888161483

Test set accuracy:
Percent Type I errors: 0.1325153374233129
Percent Type II errors: 0.1656441717791411


In [8]:
# Initialize and fit the model.
clf4 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 500)
clf4.fit(X_train, y_train)

predict_train4 = clf4.predict(X_train)
predict_test4 = clf4.predict(X_test)

# Accuracy tables.
table_train4 = pd.crosstab(y_train, predict_train4, margins=True)
table_test4 = pd.crosstab(y_test, predict_test4, margins=True)

train_tI_errors4 = table_train4.loc[0.0,1.0] / table_train4.loc['All','All']
train_tII_errors4 = table_train4.loc[1.0,0.0] / table_train4.loc['All','All']

test_tI_errors4 = table_test4.loc[0.0,1.0]/table_test4.loc['All','All']
test_tII_errors4 = table_test4.loc[1.0,0.0]/table_test4.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors4, train_tII_errors4, test_tI_errors4, test_tII_errors4))

Training set accuracy:
Percent Type I errors: 0.5531914893617021
Percent Type II errors: 0.1336606655755592

Test set accuracy:
Percent Type I errors: 0.5435582822085889
Percent Type II errors: 0.1558282208588957


In [9]:
# Initialize and fit the model.
clf5 = ensemble.GradientBoostingClassifier(subsample = 0.001, n_estimators = 500)
clf5.fit(X_train, y_train)

predict_train5 = clf5.predict(X_train)
predict_test5 = clf5.predict(X_test)

# Accuracy tables.
table_train5 = pd.crosstab(y_train, predict_train5, margins=True)
table_test5 = pd.crosstab(y_test, predict_test5, margins=True)

train_tI_errors5 = table_train5.loc[0.0,1.0] / table_train5.loc['All','All']
train_tII_errors5 = table_train5.loc[1.0,0.0] / table_train5.loc['All','All']

test_tI_errors5 = table_test5.loc[0.0,1.0]/table_test5.loc['All','All']
test_tII_errors5 = table_test5.loc[1.0,0.0]/table_test5.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors5, train_tII_errors5, test_tI_errors5, test_tII_errors5))

Training set accuracy:
Percent Type I errors: 0.008319694489907256
Percent Type II errors: 0.3760229132569558

Test set accuracy:
Percent Type I errors: 0.006134969325153374
Percent Type II errors: 0.3717791411042945


In [10]:
# Initialize and fit the model.
clf6 = ensemble.GradientBoostingClassifier(subsample = 0.0005, n_estimators = 500)
clf6.fit(X_train, y_train)

predict_train6 = clf6.predict(X_train)
predict_test6 = clf6.predict(X_test)

# Accuracy tables.
table_train6 = pd.crosstab(y_train, predict_train6, margins=True)
table_test6 = pd.crosstab(y_test, predict_test6, margins=True)

train_tI_errors6 = table_train6.loc[0.0,1.0] / table_train6.loc['All','All']
train_tII_errors6 = table_train6.loc[1.0,0.0] / table_train6.loc['All','All']

test_tI_errors6 = table_test6.loc[0.0,1.0]/table_test6.loc['All','All']
test_tII_errors6 = table_test6.loc[1.0,0.0]/table_test6.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors6, train_tII_errors6, test_tI_errors6, test_tII_errors6))

Training set accuracy:
Percent Type I errors: 0.5144571740316422
Percent Type II errors: 0.06451172940534643

Test set accuracy:
Percent Type I errors: 0.48588957055214727
Percent Type II errors: 0.04171779141104295


### For both the training and test set and Type I and II errors, 0.01 subsample parameter performed the best. We will use that going forward.

## Different Loss Function

In [11]:
# Initialize and fit the model.
clf7 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 500, loss = 'exponential')
clf7.fit(X_train, y_train)

predict_train7 = clf7.predict(X_train)
predict_test7 = clf7.predict(X_test)

# Accuracy tables.
table_train7 = pd.crosstab(y_train, predict_train7, margins=True)
table_test7 = pd.crosstab(y_test, predict_test7, margins=True)

train_tI_errors7 = table_train7.loc[0.0,1.0] / table_train7.loc['All','All']
train_tII_errors7 = table_train7.loc[1.0,0.0] / table_train7.loc['All','All']

test_tI_errors7 = table_test7.loc[0.0,1.0]/table_test7.loc['All','All']
test_tII_errors7 = table_test7.loc[1.0,0.0]/table_test7.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors7, train_tII_errors7, test_tI_errors7, test_tII_errors7))

Training set accuracy:
Percent Type I errors: 0.0947899618112384
Percent Type II errors: 0.16789416257501363

Test set accuracy:
Percent Type I errors: 0.08711656441717791
Percent Type II errors: 0.19386503067484662


### Exponetial Loss Function performs worse than the default, deviance. We will use the default.

## More Iterations

In [3]:
# Initialize and fit the model.
clf8 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 600)
clf8.fit(X_train, y_train)

predict_train8 = clf8.predict(X_train)
predict_test8 = clf8.predict(X_test)

# Accuracy tables.
table_train8 = pd.crosstab(y_train, predict_train8, margins=True)
table_test8 = pd.crosstab(y_test, predict_test8, margins=True)

train_tI_errors8 = table_train8.loc[0.0,1.0] / table_train8.loc['All','All']
train_tII_errors8 = table_train8.loc[1.0,0.0] / table_train8.loc['All','All']

test_tI_errors8 = table_test8.loc[0.0,1.0]/table_test8.loc['All','All']
test_tII_errors8 = table_test8.loc[1.0,0.0]/table_test8.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors8, train_tII_errors8, test_tI_errors8, test_tII_errors8))

Training set accuracy:
Percent Type I errors: 0.33974358974358976
Percent Type II errors: 0.1793507910529187

Test set accuracy:
Percent Type I errors: 0.21104294478527608
Percent Type II errors: 0.3042944785276074


In [4]:
# Initialize and fit the model.
clf9 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 700)
clf9.fit(X_train, y_train)

predict_train9 = clf9.predict(X_train)
predict_test9 = clf9.predict(X_test)

# Accuracy tables.
table_train9 = pd.crosstab(y_train, predict_train9, margins=True)
table_test9 = pd.crosstab(y_test, predict_test9, margins=True)

train_tI_errors9 = table_train9.loc[0.0,1.0] / table_train9.loc['All','All']
train_tII_errors9 = table_train9.loc[1.0,0.0] / table_train9.loc['All','All']

test_tI_errors9 = table_test8.loc[0.0,1.0]/table_test9.loc['All','All']
test_tII_errors9 = table_test8.loc[1.0,0.0]/table_test9.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors9, train_tII_errors9, test_tI_errors9, test_tII_errors9))

Training set accuracy:
Percent Type I errors: 0.3527004909983633
Percent Type II errors: 0.11974904528096017

Test set accuracy:
Percent Type I errors: 0.21104294478527608
Percent Type II errors: 0.3042944785276074


In [5]:
# Initialize and fit the model.
clf10 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 800)
clf10.fit(X_train, y_train)

predict_train10 = clf10.predict(X_train)
predict_test10 = clf10.predict(X_test)

# Accuracy tables.
table_train10 = pd.crosstab(y_train, predict_train10, margins=True)
table_test10 = pd.crosstab(y_test, predict_test10, margins=True)

train_tI_errors10 = table_train10.loc[0.0,1.0] / table_train10.loc['All','All']
train_tII_errors10 = table_train10.loc[1.0,0.0] / table_train10.loc['All','All']

test_tI_errors10 = table_test10.loc[0.0,1.0]/table_test10.loc['All','All']
test_tII_errors10 = table_test10.loc[1.0,0.0]/table_test10.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors10, train_tII_errors10, test_tI_errors10, test_tII_errors10))

Training set accuracy:
Percent Type I errors: 0.06287506819421713
Percent Type II errors: 0.34997272231314785

Test set accuracy:
Percent Type I errors: 0.07239263803680981
Percent Type II errors: 0.33865030674846625


### Each increase in iterations results in poorer results.

## Less Iterations

In [6]:
# Initialize and fit the model.
clf11 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 400)
clf11.fit(X_train, y_train)

predict_train11 = clf11.predict(X_train)
predict_test11 = clf11.predict(X_test)

# Accuracy tables.
table_train11 = pd.crosstab(y_train, predict_train11, margins=True)
table_test11 = pd.crosstab(y_test, predict_test11, margins=True)

train_tI_errors11 = table_train11.loc[0.0,1.0] / table_train11.loc['All','All']
train_tII_errors11 = table_train11.loc[1.0,0.0] / table_train11.loc['All','All']

test_tI_errors11 = table_test11.loc[0.0,1.0]/table_test11.loc['All','All']
test_tII_errors11 = table_test11.loc[1.0,0.0]/table_test11.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors11, train_tII_errors11, test_tI_errors11, test_tII_errors11))

Training set accuracy:
Percent Type I errors: 0.21249318057828695
Percent Type II errors: 0.1579378068739771

Test set accuracy:
Percent Type I errors: 0.5263803680981595
Percent Type II errors: 0.0392638036809816


In [7]:
# Initialize and fit the model.
clf12 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 300)
clf12.fit(X_train, y_train)

predict_train12 = clf12.predict(X_train)
predict_test12 = clf12.predict(X_test)

# Accuracy tables.
table_train12 = pd.crosstab(y_train, predict_train12, margins=True)
table_test12 = pd.crosstab(y_test, predict_test12, margins=True)

train_tI_errors12 = table_train12.loc[0.0,1.0] / table_train12.loc['All','All']
train_tII_errors12 = table_train12.loc[1.0,0.0] / table_train12.loc['All','All']

test_tI_errors12 = table_test12.loc[0.0,1.0]/table_test12.loc['All','All']
test_tII_errors12 = table_test12.loc[1.0,0.0]/table_test12.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors12, train_tII_errors12, test_tI_errors12, test_tII_errors12))

Training set accuracy:
Percent Type I errors: 0.36020185488270595
Percent Type II errors: 0.1539825422804146

Test set accuracy:
Percent Type I errors: 0.3006134969325153
Percent Type II errors: 0.16319018404907976


### Each decrease in iterations also results in poorer results. It seems that for this dataset, 500 iterations is ideal.

## Change Max Depth

In [8]:
# Initialize and fit the model.
clf13 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 500, max_depth = 4)
clf13.fit(X_train, y_train)

predict_train13 = clf13.predict(X_train)
predict_test13 = clf13.predict(X_test)

# Accuracy tables.
table_train13 = pd.crosstab(y_train, predict_train13, margins=True)
table_test13 = pd.crosstab(y_test, predict_test13, margins=True)

train_tI_errors13 = table_train13.loc[0.0,1.0] / table_train13.loc['All','All']
train_tII_errors13 = table_train13.loc[1.0,0.0] / table_train13.loc['All','All']

test_tI_errors13 = table_test13.loc[0.0,1.0]/table_test13.loc['All','All']
test_tII_errors13 = table_test13.loc[1.0,0.0]/table_test13.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors13, train_tII_errors13, test_tI_errors13, test_tII_errors13))

Training set accuracy:
Percent Type I errors: 0.16284779050736498
Percent Type II errors: 0.24045280960174578

Test set accuracy:
Percent Type I errors: 0.29325153374233126
Percent Type II errors: 0.1558282208588957


In [10]:
# Initialize and fit the model.
clf14 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 500, max_depth = 5)
clf14.fit(X_train, y_train)

predict_train14 = clf14.predict(X_train)
predict_test14 = clf14.predict(X_test)

# Accuracy tables.
table_train14 = pd.crosstab(y_train, predict_train14, margins=True)
table_test14 = pd.crosstab(y_test, predict_test14, margins=True)

train_tI_errors14 = table_train14.loc[0.0,1.0] / table_train14.loc['All','All']
train_tII_errors14 = table_train14.loc[1.0,0.0] / table_train14.loc['All','All']

test_tI_errors14 = table_test14.loc[0.0,1.0]/table_test14.loc['All','All']
test_tII_errors14 = table_test14.loc[1.0,0.0]/table_test14.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors14, train_tII_errors14, test_tI_errors14, test_tII_errors14))

Training set accuracy:
Percent Type I errors: 0.26909438079650844
Percent Type II errors: 0.2193126022913257

Test set accuracy:
Percent Type I errors: 0.2822085889570552
Percent Type II errors: 0.20122699386503068


In [11]:
# Initialize and fit the model.
clf15 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 500, max_depth = 2)
clf15.fit(X_train, y_train)

predict_train15 = clf15.predict(X_train)
predict_test15 = clf15.predict(X_test)

# Accuracy tables.
table_train15 = pd.crosstab(y_train, predict_train15, margins=True)
table_test15 = pd.crosstab(y_test, predict_test15, margins=True)

train_tI_errors15 = table_train15.loc[0.0,1.0] / table_train15.loc['All','All']
train_tII_errors15 = table_train15.loc[1.0,0.0] / table_train15.loc['All','All']

test_tI_errors15 = table_test15.loc[0.0,1.0]/table_test15.loc['All','All']
test_tII_errors15 = table_test15.loc[1.0,0.0]/table_test15.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors15, train_tII_errors15, test_tI_errors15, test_tII_errors15))

Training set accuracy:
Percent Type I errors: 0.3873431533006001
Percent Type II errors: 0.10092744135297327

Test set accuracy:
Percent Type I errors: 0.5251533742331288
Percent Type II errors: 0.05644171779141104


In [12]:
# Initialize and fit the model.
clf16 = ensemble.GradientBoostingClassifier(subsample = 0.01, n_estimators = 500, max_depth = 1)
clf16.fit(X_train, y_train)

predict_train16 = clf16.predict(X_train)
predict_test16 = clf16.predict(X_test)

# Accuracy tables.
table_train16 = pd.crosstab(y_train, predict_train16, margins=True)
table_test16 = pd.crosstab(y_test, predict_test16, margins=True)

train_tI_errors16 = table_train16.loc[0.0,1.0] / table_train16.loc['All','All']
train_tII_errors16 = table_train16.loc[1.0,0.0] / table_train16.loc['All','All']

test_tI_errors16 = table_test16.loc[0.0,1.0]/table_test16.loc['All','All']
test_tII_errors16 = table_test16.loc[1.0,0.0]/table_test16.loc['All','All']

print((
    'Training set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}\n\n'
    'Test set accuracy:\n'
    'Percent Type I errors: {}\n'
    'Percent Type II errors: {}'
).format(train_tI_errors16, train_tII_errors16, test_tI_errors16, test_tII_errors16))

Training set accuracy:
Percent Type I errors: 0.0851063829787234
Percent Type II errors: 0.17471358428805236

Test set accuracy:
Percent Type I errors: 0.0736196319018405
Percent Type II errors: 0.19386503067484662


### The default value of 2 for the max_depth parameter is still the most ideal for Type I and II errors. There is some increase in the accuracy of Type II errors as the max_depth increases but the Type I error accuracy drops significantly.