In [1]:
import numpy as np
from bokeh.plotting import show
from bokeh.layouts import gridplot
from soydata.data.classification import make_moons
from soydata.visualize import scatterplot
from soydata.visualize import use_notebook

use_notebook()

X_twomoon, y_twomoon = make_moons(n_samples=1000, xy_ratio=2.0, x_gap=-0.2, y_gap=0.2, noise=0.1)
p = scatterplot(X_twomoon, labels=y_twomoon, height=400, width=400, size=3, title='Two moon')

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

def train_and_compare(X, labels):
    names = ['knn k=5', 'knn k=10', 'knn k=20', 'Random Forest (depth=10)',
             'Boostring (estimators=50)', 'Neuralnet h=(20,5)']

    models = [
        KNeighborsClassifier(n_neighbors=5, weights='distance', metric='euclidean', algorithm='ball_tree'),
        KNeighborsClassifier(n_neighbors=10, weights='distance', metric='euclidean', algorithm='ball_tree'),
        KNeighborsClassifier(n_neighbors=20, weights='distance', metric='euclidean', algorithm='ball_tree'),
        RandomForestClassifier(max_depth=10, random_state=0),
        GradientBoostingClassifier(n_estimators=50, random_state=0),
        MLPClassifier(hidden_layer_sizes=(20,5), random_state=0)
    ]

    figures = []
    for name, model in zip(names, models):
        model.fit(X, labels)
        predicted = model.predict(X)
        accuracy = (labels == predicted).mean()
        # color wrong prediction point with grey
        predicted[np.where(labels != predicted)[0]] = -1
        p = scatterplot(X, labels=predicted, height=400, width=400, size=3,
                        title=f'{name}, accuracy={accuracy:.4}', show_inline=False)
        figures.append(p)

    grid = [figures[:3], figures[3:]]
    gp = gridplot(grid)
    return gp

gp_twomoon = train_and_compare(X_twomoon, y_twomoon)
show(gp_twomoon)


The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.



In [3]:
from soydata.data.classification import make_spiral

X_spiral, y_spiral = make_spiral(n_samples_per_class=1000, n_classes=3,
    n_rotations=2.5, gap_between_spiral=0.1, noise=0.2,
    gap_between_start_point=0.1, equal_interval=True)
p = scatterplot(X_spiral, labels=y_spiral, title='Spiral')

In [4]:
gp_spiral = train_and_compare(X_spiral, y_spiral)
show(gp_spiral)


The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.



In [5]:
from soydata.data.supervised import make_complex_rectangulars

X_rec, y_rec = make_complex_rectangulars(n_samples=3000, n_classes=3,
    n_rectangulars=20, volume=0.5, seed=0)
X_rec -= 0.5
p = scatterplot(X_rec, labels=y_rec, title='Complex rectangulars (3 classes)', size=3)

In [6]:
gp_rec = train_and_compare(X_rec, y_rec)
show(gp_rec)


The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.

