In [1]:
from bokeh.plotting import output_notebook, show
from ensemble_utils import scatterplot_2class
from ensemble_utils import draw_activate_image
from soydata.data.classification import make_moons

import numpy as np
np.set_printoptions(precision=5, suppress=True)

import warnings
warnings.filterwarnings('ignore')

output_notebook()

X, labels = make_moons(n_samples=500, xy_ratio=2.0, x_gap=-0.2, y_gap=-0.15, noise=0.1, seed=0)
p = scatterplot_2class(X, labels, height=400, width=400)
show(p)

In [2]:
from sklearn.tree import DecisionTreeClassifier

def prepare_elements(model, X, labels):
    score = model.predict_proba(X)
    score = score[:,1] - score[:,0]
    pred = model.predict(X)    
    accuracy = (pred == labels).sum() / labels.shape[0]
    return score, accuracy

In [3]:
dt = DecisionTreeClassifier()
dt.fit(X, labels)

score, accuracy = prepare_elements(dt, X, labels)
title = f'Decision Tree. accuracy={accuracy:.4}'
p = draw_activate_image(dt, X, use_score=True, resolution=100, title=title, height=400, width=400)
p = scatterplot_2class(X, labels, score=score, p=p)
show(p)

In [6]:
from sklearn.ensemble import RandomForestClassifier
from bokeh.layouts import gridplot
from bokeh.io import save

for depth in [4, None]:
    figures = []

    rf = RandomForestClassifier(
        n_estimators = 100,
        max_depth = depth,
        oob_score = True
    )
    rf.fit(X, labels)

    score, accuracy = prepare_elements(rf, X, labels)
    title = f'Random Forest (max depth={depth}). accuracy={accuracy:.4}'
    p = draw_activate_image(rf, X, use_score=True, resolution=100, height=400, width=400, title=title)
    p = scatterplot_2class(X, labels, score=score, p=p)
    figures.append(p)

    for dt in rf.estimators_[:5]:
        score, accuracy = prepare_elements(dt, X, labels)
        title = f'Decision Tree. accuracy={accuracy:.4}'
        p = draw_activate_image(dt, X, use_score=True, resolution=100, height=400, width=400, title=title)
        p = scatterplot_2class(X, labels, score=score, p=p)
        figures.append(p)

    gp = gridplot([figures[:3], figures[3:]])
    show(gp)
#     save(gp, f'./figures/random_forest_moon_depth_{depth}.html')

In [7]:
rf.oob_score_

0.966

In [11]:
rf.oob_decision_function_.shape

(500, 2)

In [8]:
rf.feature_importances_

array([0.47039, 0.52961])