In [9]:
from distributed import Executor, progress

In [10]:
e = Executor('dscheduler:8786')

In [12]:
import subprocess


def install_libs():
    libs = 'scikit-learn pandas matplotlib'.split()
    subprocess.check_call('conda install -yq'.split() + libs)
    subprocess.check_call('pip install git+https://github.com/joblib/joblib'.split())

In [15]:
install_libs()
e.run(install_libs)
e.restart()

<Executor: scheduler=10.0.0.2:8786 workers=1 threads=12>

In [14]:
# Force the use of the development branch of joblib in scikit-learn
# won't be necessary once scikit-learn will get in sync with joblib
# 0.10+
import joblib
from sklearn.externals import joblib as skl_joblib
print('Monkeypatching scikit-learn embedded joblib')
for k, v in vars(joblib).items():
   setattr(skl_joblib, k, v)

Monkeypatching scikit-learn embedded joblib


In [18]:
from distributed_joblib_backend import DistributedBackend
from joblib.parallel import register_parallel_backend, parallel_backend
from joblib import Parallel, delayed

register_parallel_backend('distributed', DistributedBackend)

In [32]:
from sklearn.datasets import load_digits
from sklearn.grid_search import RandomizedSearchCV
from sklearn.svm import SVC
import numpy as np

digits = load_digits()

param_space = {
    'C': np.logspace(-6, 6, 13),
    'gamma': np.logspace(-8, 8, 17),
    'tol': np.logspace(-4, -1, 4),
    'class_weight': [None, 'balanced'],
}

model = SVC(kernel='rbf')
search = RandomizedSearchCV(model, param_space, cv=3, n_iter=50, verbose=10)

In [33]:
with parallel_backend('distributed', scheduler_host='dscheduler:8786'):
    search.fit(digits.data, digits.target)

search.best_params_, search.best_score_

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed:   10.5s
[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed:   12.8s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:   15.4s
[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:   18.0s
[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed:   20.0s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   24.0s finished


({'C': 1.0,
  'class_weight': 'balanced',
  'gamma': 0.001,
  'tol': 0.10000000000000001},
 0.97440178074568728)