In [1]:
import sys

In [2]:
sys.path.append('..')

In [3]:
from monitoring.utils import cv_curve_and_table_for_source, log10uniform

In [4]:
import warnings
warnings.filterwarnings('ignore')

from time import time

from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.model_selection import ParameterSampler, check_cv

from bokeh.io import output_notebook, push_notebook
import bokeh.plotting as bp
import bokeh.models as bm

In [5]:
import bokeh.models as bm
import bokeh.plotting as bp
import bokeh.layouts as bl
import numpy as np
import pandas as pd

In [None]:
output_notebook()

In [7]:
digits = load_digits()
X, y = digits.data, digits.target

In [8]:
param_dist = {
    'C': log10uniform(-4, 4),
    'gamma': log10uniform(-4, 4)
}
model = SVC(kernel='rbf')

Since we have to access mean scores during the search, we must create a loop to search over parameters

In [21]:
def fit_and_score(est, X_train, y_train, X_valid, y_valid, params):
    est.set_params(**params)
    est.fit(X_train, y_train)
    train_score = est.score(X_train, y_train)
    test_score = est.score(X_valid, y_valid)
    return train_score, test_score

def _mean_for_scores(train_test_scores):
    train_score, test_score = tuple(np.mean(train_test_scores, 0))
    return train_score, test_score

def evaluate(est, X, y, params, cv):
    scores = []
    for train_inds, test_inds in cv.split(
        X, y
    ):
        X_train, y_train, X_valid, y_valid = (
            X[train_inds], 
            y[train_inds],
            X[test_inds],
            y[test_inds]
        )
        train_test_scores = fit_and_score(
            model, X_train, y_train, X_valid, y_valid, params
        )
        scores.append(train_test_scores)
    return _mean_for_scores(scores)

We create the Bokeh data source, data table and a plot for the cross validation curve:

In [11]:
def cv_curve_for_source(source):
    fig = bp.figure(
        plot_width=900, plot_height=300,
        x_axis_type='linear',
        x_axis_label='iteration',
        y_axis_label='mean cv score', 
        tools="pan,wheel_zoom,box_select,reset",
        logo=None
    )
    fig.grid.visible = False
    x_val = 'index'
    fig.line(x_val, 'mean_test_score', source=source, line_width=1.5,
             line_join='round', alpha=0.5)
    fig.scatter(x_val, 'mean_test_score', source=source, size=3, alpha=0.5)
    fig.line(x_val, 'cummax_score', source=source, color='orange',
             line_width=1.5, line_join='round')
    fig.scatter(x_val, 'cummax_score', source=source, size=3, color='orange')
    return fig

In [12]:
def datatable_for_source(source):
    columns = [
        bm.TableColumn(field=c, title=c) for c in source.column_names
        if c not in ['index', 'tstamp']
    ]
    columns += [
        bm.TableColumn(
            field='tstamp',
            title='tstamp',
            formatter=bm.DateFormatter(format="%Y-%m-%d %T")
        )
    ]
    table = bm.DataTable(source=source, columns=columns, width=900, height=300)
    return table

In [13]:
def cv_curve_and_table_for_source(source):
    layout = bl.column(
        [cv_curve_for_source(source), datatable_for_source(source)]
    )
    return layout

Finally, we render our visualisation and run our search:

In [24]:
def random_search_cv(est, X, y, param_dist, cv=3, n_iter=100):
    """RandomizedSearchCV with monitoring"""
    cv = check_cv(cv)
    source = bm.ColumnDataSource(pd.DataFrame({
            'tstamp': [],
            'cummax_score': [],
            'mean_test_score': [],
            'params': np.array([], dtype=str),
    }))
    layout = cv_curve_and_table_for_source(source)
    h = bp.show(layout, notebook_handle=True)
    param_iter = iter(ParameterSampler(param_dist, n_iter=n_iter))

    for params in param_iter:
        mean_train_score, mean_test_score = evaluate(est, X, y, params, cv)
        source.stream({
            'index': [len(source.data['index'])],
            'mean_test_score': [mean_test_score],
            'cummax_score': [max(np.append(source.data['cummax_score'], mean_test_score))],
            'tstamp': [pd.datetime.fromtimestamp(time())],
            'params': [str(params)],
        })
        push_notebook(h)

In [1]:
random_search_cv(model, X, y, param_dist)