Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] joblib 0.12 integration #9486

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions doc/modules/compose.rst
Expand Up @@ -342,7 +342,7 @@ and ``value`` is an estimator object::
>>> estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
>>> combined = FeatureUnion(estimators)
>>> combined # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
FeatureUnion(n_jobs=1,
FeatureUnion(n_jobs=None,
transformer_list=[('linear_pca', PCA(copy=True,...)),
('kernel_pca', KernelPCA(alpha=1.0,...))],
transformer_weights=None)
Expand All @@ -357,7 +357,7 @@ and ignored by setting to ``None``::

>>> combined.set_params(kernel_pca=None)
... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
FeatureUnion(n_jobs=1,
FeatureUnion(n_jobs=None,
transformer_list=[('linear_pca', PCA(copy=True,...)),
('kernel_pca', None)],
transformer_weights=None)
Expand Down Expand Up @@ -423,7 +423,7 @@ By default, the remaining rating columns are ignored (``remainder='drop'``)::
... remainder='drop')

>>> column_trans.fit(X) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
ColumnTransformer(n_jobs=1, remainder='drop', sparse_threshold=0.3,
ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
transformer_weights=None,
transformers=...)

Expand Down Expand Up @@ -496,7 +496,7 @@ above example would be::
... ('city', CountVectorizer(analyzer=lambda x: [x])),
... ('title', CountVectorizer()))
>>> column_trans # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
ColumnTransformer(n_jobs=1, remainder='drop', sparse_threshold=0.3,
ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
transformer_weights=None,
transformers=[('countvectorizer-1', ...)

Expand Down
3 changes: 1 addition & 2 deletions doc/modules/kernel_approximation.rst
Expand Up @@ -64,10 +64,9 @@ a linear algorithm, for example a linear SVM::
SGDClassifier(alpha=0.0001, average=False, class_weight=None,
early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=5,
n_iter=None, n_iter_no_change=5, n_jobs=1, penalty='l2',
n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
power_t=0.5, random_state=None, shuffle=True, tol=None,
validation_fraction=0.1, verbose=0, warm_start=False)

>>> clf.score(X_features, y)
1.0

Expand Down
4 changes: 3 additions & 1 deletion doc/modules/linear_model.rst
Expand Up @@ -45,7 +45,9 @@ and will store the coefficients :math:`w` of the linear model in its
>>> from sklearn import linear_model
>>> reg = linear_model.LinearRegression()
>>> reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
... # doctest: +NORMALIZE_WHITESPACE
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
normalize=False)
>>> reg.coef_
array([0.5, 0.5])

Expand Down
2 changes: 1 addition & 1 deletion doc/modules/sgd.rst
Expand Up @@ -64,7 +64,7 @@ for the training samples::
SGDClassifier(alpha=0.0001, average=False, class_weight=None,
early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=5,
n_iter=None, n_iter_no_change=5, n_jobs=1, penalty='l2',
n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
power_t=0.5, random_state=None, shuffle=True, tol=None,
validation_fraction=0.1, verbose=0, warm_start=False)

Expand Down
2 changes: 1 addition & 1 deletion doc/tutorial/statistical_inference/model_selection.rst
Expand Up @@ -269,7 +269,7 @@ parameter automatically by cross-validation::
>>> y_diabetes = diabetes.target
>>> lasso.fit(X_diabetes, y_diabetes)
LassoCV(alphas=None, copy_X=True, cv=3, eps=0.001, fit_intercept=True,
max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
max_iter=1000, n_alphas=100, n_jobs=None, normalize=False, positive=False,
precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
verbose=False)
>>> # The estimator chose automatically its lambda:
Expand Down
18 changes: 12 additions & 6 deletions doc/tutorial/statistical_inference/supervised_learning.rst
Expand Up @@ -95,7 +95,7 @@ Scikit-learn documentation for more information about this type of classifier.)
>>> knn = KNeighborsClassifier()
>>> knn.fit(iris_X_train, iris_y_train) # doctest: +NORMALIZE_WHITESPACE
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=1, n_neighbors=5, p=2,
metric_params=None, n_jobs=None, n_neighbors=5, p=2,
weights='uniform')
>>> knn.predict(iris_X_test)
array([1, 2, 1, 0, 0, 0, 2, 1, 2, 0])
Expand Down Expand Up @@ -176,13 +176,16 @@ Linear models: :math:`y = X\beta + \epsilon`
>>> from sklearn import linear_model
>>> regr = linear_model.LinearRegression()
>>> regr.fit(diabetes_X_train, diabetes_y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
... # doctest: +NORMALIZE_WHITESPACE
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
normalize=False)
>>> print(regr.coef_)
[ 0.30349955 -237.63931533 510.53060544 327.73698041 -814.13170937
492.81458798 102.84845219 184.60648906 743.51961675 76.09517222]

>>> # The mean square error
>>> np.mean((regr.predict(diabetes_X_test)-diabetes_y_test)**2)# doctest: +ELLIPSIS
>>> np.mean((regr.predict(diabetes_X_test)-diabetes_y_test)**2)
... # doctest: +ELLIPSIS
2004.56760268...

>>> # Explained variance score: 1 is perfect prediction
Expand Down Expand Up @@ -257,8 +260,11 @@ diabetes dataset rather than our synthetic data::
>>> from __future__ import print_function
>>> print([regr.set_params(alpha=alpha
... ).fit(diabetes_X_train, diabetes_y_train,
... ).score(diabetes_X_test, diabetes_y_test) for alpha in alphas]) # doctest: +ELLIPSIS
[0.5851110683883..., 0.5852073015444..., 0.5854677540698..., 0.5855512036503..., 0.5830717085554..., 0.57058999437...]
... ).score(diabetes_X_test, diabetes_y_test)
... for alpha in alphas])
... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
[0.5851110683883..., 0.5852073015444..., 0.5854677540698...,
0.5855512036503..., 0.5830717085554..., 0.57058999437...]


.. note::
Expand Down Expand Up @@ -372,7 +378,7 @@ function or **logistic** function:
>>> logistic.fit(iris_X_train, iris_y_train)
LogisticRegression(C=100000.0, class_weight=None, dual=False,
fit_intercept=True, intercept_scaling=1, max_iter=100,
multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
multi_class='ovr', n_jobs=None, penalty='l2', random_state=None,
solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

This is known as :class:`LogisticRegression`.
Expand Down
10 changes: 5 additions & 5 deletions sklearn/cluster/bicluster.py
Expand Up @@ -93,7 +93,7 @@ class BaseSpectral(six.with_metaclass(ABCMeta, BaseEstimator,
@abstractmethod
def __init__(self, n_clusters=3, svd_method="randomized",
n_svd_vecs=None, mini_batch=False, init="k-means++",
n_init=10, n_jobs=1, random_state=None):
n_init=10, n_jobs=None, random_state=None):
self.n_clusters = n_clusters
self.svd_method = svd_method
self.n_svd_vecs = n_svd_vecs
Expand Down Expand Up @@ -271,7 +271,7 @@ class SpectralCoclustering(BaseSpectral):
array([0, 0], dtype=int32)
>>> clustering # doctest: +NORMALIZE_WHITESPACE
SpectralCoclustering(init='k-means++', mini_batch=False, n_clusters=2,
n_init=10, n_jobs=1, n_svd_vecs=None, random_state=0,
n_init=10, n_jobs=None, n_svd_vecs=None, random_state=0,
svd_method='randomized')

References
Expand All @@ -284,7 +284,7 @@ class SpectralCoclustering(BaseSpectral):
"""
def __init__(self, n_clusters=3, svd_method='randomized',
n_svd_vecs=None, mini_batch=False, init='k-means++',
n_init=10, n_jobs=1, random_state=None):
n_init=10, n_jobs=None, random_state=None):
super(SpectralCoclustering, self).__init__(n_clusters,
svd_method,
n_svd_vecs,
Expand Down Expand Up @@ -419,7 +419,7 @@ class SpectralBiclustering(BaseSpectral):
>>> clustering # doctest: +NORMALIZE_WHITESPACE
SpectralBiclustering(init='k-means++', method='bistochastic',
mini_batch=False, n_best=3, n_clusters=2, n_components=6,
n_init=10, n_jobs=1, n_svd_vecs=None, random_state=0,
n_init=10, n_jobs=None, n_svd_vecs=None, random_state=0,
svd_method='randomized')

References
Expand All @@ -433,7 +433,7 @@ class SpectralBiclustering(BaseSpectral):
def __init__(self, n_clusters=3, method='bistochastic',
n_components=6, n_best=3, svd_method='randomized',
n_svd_vecs=None, mini_batch=False, init='k-means++',
n_init=10, n_jobs=1, random_state=None):
n_init=10, n_jobs=None, random_state=None):
super(SpectralBiclustering, self).__init__(n_clusters,
svd_method,
n_svd_vecs,
Expand Down
7 changes: 4 additions & 3 deletions sklearn/cluster/dbscan_.py
Expand Up @@ -20,7 +20,8 @@


def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
algorithm='auto', leaf_size=30, p=2, sample_weight=None, n_jobs=1):
algorithm='auto', leaf_size=30, p=2, sample_weight=None,
n_jobs=None):
"""Perform DBSCAN clustering from vector array or distance matrix.

Read more in the :ref:`User Guide <dbscan>`.
Expand Down Expand Up @@ -255,7 +256,7 @@ class DBSCAN(BaseEstimator, ClusterMixin):
array([ 0, 0, 0, 1, 1, -1])
>>> clustering # doctest: +NORMALIZE_WHITESPACE
DBSCAN(algorithm='auto', eps=3, leaf_size=30, metric='euclidean',
metric_params=None, min_samples=2, n_jobs=1, p=None)
metric_params=None, min_samples=2, n_jobs=None, p=None)

See also
--------
Expand Down Expand Up @@ -296,7 +297,7 @@ class DBSCAN(BaseEstimator, ClusterMixin):

def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
metric_params=None, algorithm='auto', leaf_size=30, p=None,
n_jobs=1):
n_jobs=None):
self.eps = eps
self.min_samples = min_samples
self.metric = metric
Expand Down
11 changes: 6 additions & 5 deletions sklearn/cluster/k_means_.py
Expand Up @@ -25,12 +25,13 @@
from ..utils.sparsefuncs import mean_variance_axis
from ..utils.validation import _num_samples
from ..utils import check_array
from ..utils import check_random_state
from ..utils import gen_batches
from ..utils import check_random_state
from ..utils.validation import check_is_fitted
from ..utils.validation import FLOAT_DTYPES
from ..utils import Parallel
from ..utils import delayed
from ..utils import effective_n_jobs
from ..externals.six import string_types
from ..exceptions import ConvergenceWarning
from . import _k_means
Expand Down Expand Up @@ -184,8 +185,8 @@ def _check_sample_weight(X, sample_weight):

def k_means(X, n_clusters, sample_weight=None, init='k-means++',
precompute_distances='auto', n_init=10, max_iter=300,
verbose=False, tol=1e-4, random_state=None, copy_x=True, n_jobs=1,
algorithm="auto", return_n_iter=False):
verbose=False, tol=1e-4, random_state=None, copy_x=True,
n_jobs=None, algorithm="auto", return_n_iter=False):
"""K-means clustering algorithm.

Read more in the :ref:`User Guide <k_means>`.
Expand Down Expand Up @@ -368,7 +369,7 @@ def k_means(X, n_clusters, sample_weight=None, init='k-means++',
else:
raise ValueError("Algorithm must be 'auto', 'full' or 'elkan', got"
" %s" % str(algorithm))
if n_jobs == 1:
if effective_n_jobs(n_jobs):
# For a single thread, less memory is needed if we just store one set
# of the best results (as opposed to one set per run per thread).
for it in range(n_init):
Expand Down Expand Up @@ -913,7 +914,7 @@ class KMeans(BaseEstimator, ClusterMixin, TransformerMixin):
def __init__(self, n_clusters=8, init='k-means++', n_init=10,
max_iter=300, tol=1e-4, precompute_distances='auto',
verbose=0, random_state=None, copy_x=True,
n_jobs=1, algorithm='auto'):
n_jobs=None, algorithm='auto'):

self.n_clusters = n_clusters
self.init = init
Expand Down
8 changes: 4 additions & 4 deletions sklearn/cluster/mean_shift_.py
Expand Up @@ -29,7 +29,7 @@


def estimate_bandwidth(X, quantile=0.3, n_samples=None, random_state=0,
n_jobs=1):
n_jobs=None):
"""Estimate the bandwidth to use with the mean-shift algorithm.

That this function takes time at least quadratic in n_samples. For large
Expand Down Expand Up @@ -107,7 +107,7 @@ def _mean_shift_single_seed(my_mean, X, nbrs, max_iter):

def mean_shift(X, bandwidth=None, seeds=None, bin_seeding=False,
min_bin_freq=1, cluster_all=True, max_iter=300,
n_jobs=1):
n_jobs=None):
"""Perform mean shift clustering of data using a flat kernel.

Read more in the :ref:`User Guide <mean_shift>`.
Expand Down Expand Up @@ -364,7 +364,7 @@ class MeanShift(BaseEstimator, ClusterMixin):
array([0, 1])
>>> clustering # doctest: +NORMALIZE_WHITESPACE
MeanShift(bandwidth=2, bin_seeding=False, cluster_all=True, min_bin_freq=1,
n_jobs=1, seeds=None)
n_jobs=None, seeds=None)

Notes
-----
Expand Down Expand Up @@ -392,7 +392,7 @@ class MeanShift(BaseEstimator, ClusterMixin):

"""
def __init__(self, bandwidth=None, seeds=None, bin_seeding=False,
min_bin_freq=1, cluster_all=True, n_jobs=1):
min_bin_freq=1, cluster_all=True, n_jobs=None):
self.bandwidth = bandwidth
self.seeds = seeds
self.bin_seeding = bin_seeding
Expand Down
6 changes: 3 additions & 3 deletions sklearn/cluster/optics_.py
Expand Up @@ -26,7 +26,7 @@ def optics(X, min_samples=5, max_bound=np.inf, metric='euclidean',
rejection_ratio=.7, similarity_threshold=0.4,
significant_min=.003, min_cluster_size_ratio=.005,
min_maxima_ratio=0.001, algorithm='ball_tree',
leaf_size=30, n_jobs=1):
leaf_size=30, n_jobs=None):
"""Perform OPTICS clustering from vector array

OPTICS: Ordering Points To Identify the Clustering Structure
Expand Down Expand Up @@ -285,7 +285,7 @@ def __init__(self, min_samples=5, max_bound=np.inf, metric='euclidean',
rejection_ratio=.7, similarity_threshold=0.4,
significant_min=.003, min_cluster_size_ratio=.005,
min_maxima_ratio=0.001, algorithm='ball_tree',
leaf_size=30, n_jobs=1):
leaf_size=30, n_jobs=None):

self.max_bound = max_bound
self.min_samples = min_samples
Expand Down Expand Up @@ -394,7 +394,7 @@ def _set_reach_dist(self, point_index, X, nbrs):
# Keep n_jobs = 1 in the following lines...please
if len(unproc) > 0:
dists = pairwise_distances(P, np.take(X, unproc, axis=0),
self.metric, n_jobs=1).ravel()
self.metric, n_jobs=None).ravel()

rdists = np.maximum(dists, self.core_distances_[point_index])
new_reach = np.minimum(np.take(self.reachability_, unproc), rdists)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/spectral.py
Expand Up @@ -385,7 +385,7 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
>>> clustering # doctest: +NORMALIZE_WHITESPACE
SpectralClustering(affinity='rbf', assign_labels='discretize', coef0=1,
degree=3, eigen_solver=None, eigen_tol=0.0, gamma=1.0,
kernel_params=None, n_clusters=2, n_init=10, n_jobs=1,
kernel_params=None, n_clusters=2, n_init=10, n_jobs=None,
n_neighbors=10, random_state=0)

Notes
Expand Down Expand Up @@ -426,7 +426,7 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
def __init__(self, n_clusters=8, eigen_solver=None, random_state=None,
n_init=10, gamma=1., affinity='rbf', n_neighbors=10,
eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1,
kernel_params=None, n_jobs=1):
kernel_params=None, n_jobs=None):
self.n_clusters = n_clusters
self.eigen_solver = eigen_solver
self.random_state = random_state
Expand Down
6 changes: 3 additions & 3 deletions sklearn/compose/_column_transformer.py
Expand Up @@ -157,7 +157,7 @@ class ColumnTransformer(_BaseComposition, TransformerMixin):
"""

def __init__(self, transformers, remainder='drop', sparse_threshold=0.3,
n_jobs=1, transformer_weights=None):
n_jobs=None, transformer_weights=None):
self.transformers = transformers
self.remainder = remainder
self.sparse_threshold = sparse_threshold
Expand Down Expand Up @@ -687,7 +687,7 @@ def make_column_transformer(*transformers, **kwargs):
... (['numerical_column'], StandardScaler()),
... (['categorical_column'], OneHotEncoder()))
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
ColumnTransformer(n_jobs=1, remainder='drop', sparse_threshold=0.3,
ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
transformer_weights=None,
transformers=[('standardscaler',
StandardScaler(...),
Expand All @@ -697,7 +697,7 @@ def make_column_transformer(*transformers, **kwargs):
['categorical_column'])])

"""
n_jobs = kwargs.pop('n_jobs', 1)
n_jobs = kwargs.pop('n_jobs', None)
remainder = kwargs.pop('remainder', 'drop')
if kwargs:
raise TypeError('Unknown keyword arguments: "{}"'
Expand Down
9 changes: 5 additions & 4 deletions sklearn/compose/tests/test_column_transformer.py
Expand Up @@ -458,7 +458,7 @@ def test_column_transformer_get_set_params():
ct = ColumnTransformer([('trans1', StandardScaler(), [0]),
('trans2', StandardScaler(), [1])])

exp = {'n_jobs': 1,
exp = {'n_jobs': None,
'remainder': 'drop',
'sparse_threshold': 0.3,
'trans1': ct.transformers[0][1],
Expand All @@ -478,7 +478,7 @@ def test_column_transformer_get_set_params():
assert_false(ct.get_params()['trans1__with_mean'])

ct.set_params(trans1='passthrough')
exp = {'n_jobs': 1,
exp = {'n_jobs': None,
'remainder': 'drop',
'sparse_threshold': 0.3,
'trans1': 'passthrough',
Expand Down Expand Up @@ -662,6 +662,7 @@ def test_column_transformer_remainder():
ct = make_column_transformer(([0], Trans()))
assert ct.remainder == 'drop'


@pytest.mark.parametrize("key", [[0], np.array([0]), slice(0, 1),
np.array([True, False])])
def test_column_transformer_remainder_numpy(key):
Expand Down Expand Up @@ -806,7 +807,7 @@ def test_column_transformer_get_set_params_with_remainder():
ct = ColumnTransformer([('trans1', StandardScaler(), [0])],
remainder=StandardScaler())

exp = {'n_jobs': 1,
exp = {'n_jobs': None,
'remainder': ct.remainder,
'remainder__copy': True,
'remainder__with_mean': True,
Expand All @@ -825,7 +826,7 @@ def test_column_transformer_get_set_params_with_remainder():
assert not ct.get_params()['remainder__with_std']

ct.set_params(trans1='passthrough')
exp = {'n_jobs': 1,
exp = {'n_jobs': None,
'remainder': ct.remainder,
'remainder__copy': True,
'remainder__with_mean': True,
Expand Down