In [1]:
import numpy as np
import pickle

In [13]:
from sklearn.datasets import load_iris
from sklearn import tree
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import confusion_matrix

## Load the features

In [3]:
with open('../cache/features','rb') as f:
    X, y = pickle.load(f)
X.shape

(50, 3)

In [4]:
X[:5,]

array([[ 0.62509819,  1.24442821,  0.61933002],
       [-0.67339109,  1.45417919,  2.12757029],
       [-0.5878073 ,  0.52357917,  1.11138647],
       [-2.49887594,  2.66966308,  5.16853902],
       [-0.03944933, -0.25984043, -0.2203911 ]])

In [5]:
y.mean()

0.5

## Evalutate a Decision Tree Model via Cross-Validation

In [23]:
clf = tree.DecisionTreeClassifier().fit(X, y)

In [24]:
scores = cross_val_score(clf, X, y, cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.54 (+/- 0.48)


In [25]:
y_pred = cross_val_predict(clf, X, y, cv=10)
confusion_matrix(y, y_pred)

array([[15, 10],
       [13, 12]])

In [30]:
import graphviz 
dot_data = tree.export_graphviz(clf, out_file=None,   
                         filled=True, rounded=True,  
                         special_characters=True)
graph = graphviz.Source(dot_data) 
graph.render("decicition_tree") 

'decicition_tree.pdf'

## Look at bad examples

In [17]:
from PIL import Image
import imageio

def printWaterfall(file):
    im = imageio.imread(file)
    display(Image.fromarray(im))

In [18]:
is_false_negative = np.logical_and(y == 1, y_pred == 0)
for fp in X[is_false_negative]:
    print(fp)

[-2.49887594  2.66966308  5.16853902]
[-0.03944933 -0.25984043 -0.2203911 ]
[2.1574742  2.65519119 0.49771699]
[1.77362515 2.43181702 0.65819187]
[ 0.76744592  0.16562809 -0.60181783]
[3.50395173 3.67764738 0.17369565]
[ 2.26107769  1.82145815 -0.43961955]
[-0.23652635 -0.07155775  0.1649686 ]
[-1.6641765   0.31323888  1.97741538]
[ 5.54330004 10.45816298  4.91486293]
[-0.21483913  0.21619935  0.43103849]
[1.01193613 2.21828751 1.20635139]


In [19]:
is_false_positive = np.logical_and(y == 0, y_pred == 1)
for fn in X[is_false_positive]:
    print(fn)

[2.98122529 3.35490679 0.3736815 ]
[-0.36643211  0.26361184  0.63004394]
[-0.86330919  0.41841343  1.28172262]
[24.29879826 24.12743707 -0.1713612 ]
[5.60013484 5.75458414 0.1544493 ]
[ 1.03784514 -0.34498352 -1.38282866]
[1.601177   2.26345954 0.66228254]
[-0.10467485  0.08699378  0.19166863]
[0.10617613 0.36678522 0.26060909]
[2.02581137 3.75073865 1.72492728]
