Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/scikit-learn/scikit-learn
Browse files Browse the repository at this point in the history
…into searchcv
  • Loading branch information
thechargedneutron committed Sep 2, 2017
2 parents cb0cabc + 233a3e5 commit ee82d80
Show file tree
Hide file tree
Showing 38 changed files with 307 additions and 119 deletions.
12 changes: 12 additions & 0 deletions doc/whats_new.rst
Expand Up @@ -43,6 +43,18 @@ Classifiers and regressors
Enhancements
............

Classifiers and regressors

- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
is faster when using ``return_std=True`` in particular more when called
several times in a row. :issue:`9234` by :user:`andrewww <andrewww>`
and :user:`Minghui Liu <minghui-liu>`.

- Add `named_estimators_` parameter in
:class:`sklearn.ensemble.voting_classifier` to access fitted
estimators. :issue:`9157` by :user:`Herilalaina Rakotoarison <herilalaina>`.


Model evaluation and meta-estimators

- A scorer based on :func:`metrics.brier_score_loss` is also available.
Expand Down
15 changes: 11 additions & 4 deletions examples/ensemble/plot_bias_variance.py
Expand Up @@ -88,12 +88,14 @@

n_estimators = len(estimators)


# Generate data
def f(x):
x = x.ravel()

return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)


def generate(n_samples, noise, n_repeat=1):
X = np.random.rand(n_samples) * 10 - 5
X = np.sort(X)
Expand All @@ -110,6 +112,7 @@ def generate(n_samples, noise, n_repeat=1):

return X, y


X_train = []
y_train = []

Expand All @@ -120,6 +123,8 @@ def generate(n_samples, noise, n_repeat=1):

X_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)

plt.figure(figsize=(10, 8))

# Loop over estimators to compare
for n, (name, estimator) in enumerate(estimators):
# Compute predictions
Expand Down Expand Up @@ -166,8 +171,8 @@ def generate(n_samples, noise, n_repeat=1):
plt.xlim([-5, 5])
plt.title(name)

if n == 0:
plt.legend(loc="upper left", prop={"size": 11})
if n == n_estimators - 1:
plt.legend(loc=(1.1, .5))

plt.subplot(2, n_estimators, n_estimators + n + 1)
plt.plot(X_test, y_error, "r", label="$error(x)$")
Expand All @@ -178,7 +183,9 @@ def generate(n_samples, noise, n_repeat=1):
plt.xlim([-5, 5])
plt.ylim([0, 0.1])

if n == 0:
plt.legend(loc="upper left", prop={"size": 11})
if n == n_estimators - 1:

plt.legend(loc=(1.1, .5))

plt.subplots_adjust(right=.75)
plt.show()
Expand Up @@ -65,7 +65,8 @@

print("Iteration %i %s" % (i, 70 * "_"))
print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
% (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))
% (n_labeled_points, n_total_samples - n_labeled_points,
n_total_samples))

print(classification_report(true_labels, predicted_labels))

Expand Down Expand Up @@ -95,7 +96,7 @@
# for more than 5 iterations, visualize the gain only on the first 5
if i < 5:
sub = f.add_subplot(5, 5, index + 1 + (5 * i))
sub.imshow(image, cmap=plt.cm.gray_r)
sub.imshow(image, cmap=plt.cm.gray_r, interpolation='none')
sub.set_title("predict: %i\ntrue: %i" % (
lp_model.transduction_[image_index], y[image_index]), size=10)
sub.axis('off')
Expand All @@ -108,6 +109,7 @@
n_labeled_points += len(uncertainty_index)

f.suptitle("Active learning with Label Propagation.\nRows show 5 most "
"uncertain labels to learn with the next model.")
plt.subplots_adjust(0.12, 0.03, 0.9, 0.8, 0.2, 0.45)
"uncertain labels to learn with the next model.", y=1.15)
plt.subplots_adjust(left=0.2, bottom=0.03, right=0.9, top=0.9, wspace=0.2,
hspace=0.85)
plt.show()
4 changes: 2 additions & 2 deletions sklearn/cluster/hierarchical.py
Expand Up @@ -609,7 +609,7 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
"manhattan", "cosine", or 'precomputed'.
If linkage is "ward", only "euclidean" is accepted.
memory : joblib.Memory-like or string, optional
memory : None, str or object with the joblib.Memory interface, optional
Used to cache the output of the computation of the tree.
By default, no caching is done. If a string is given, it is the
path to the caching directory.
Expand Down Expand Up @@ -769,7 +769,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
"manhattan", "cosine", or 'precomputed'.
If linkage is "ward", only "euclidean" is accepted.
memory : joblib.Memory-like or string, optional
memory : None, str or object with the joblib.Memory interface, optional
Used to cache the output of the computation of the tree.
By default, no caching is done. If a string is given, it is the
path to the caching directory.
Expand Down
18 changes: 8 additions & 10 deletions sklearn/datasets/tests/test_lfw.py
Expand Up @@ -28,7 +28,7 @@
from sklearn.utils.testing import assert_array_equal
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import SkipTest
from sklearn.utils.testing import raises
from sklearn.utils.testing import assert_raises


SCIKIT_LEARN_DATA = tempfile.mkdtemp(prefix="scikit_learn_lfw_test_")
Expand Down Expand Up @@ -110,10 +110,9 @@ def teardown_module():
shutil.rmtree(SCIKIT_LEARN_EMPTY_DATA)


@raises(IOError)
def test_load_empty_lfw_people():
fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA,
download_if_missing=False)
assert_raises(IOError, fetch_lfw_people, data_home=SCIKIT_LEARN_EMPTY_DATA,
download_if_missing=False)


def test_load_fake_lfw_people():
Expand Down Expand Up @@ -148,16 +147,15 @@ def test_load_fake_lfw_people():
'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])


@raises(ValueError)
def test_load_fake_lfw_people_too_restrictive():
fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100,
download_if_missing=False)
assert_raises(ValueError, fetch_lfw_people, data_home=SCIKIT_LEARN_DATA,
min_faces_per_person=100, download_if_missing=False)


@raises(IOError)
def test_load_empty_lfw_pairs():
fetch_lfw_pairs(data_home=SCIKIT_LEARN_EMPTY_DATA,
download_if_missing=False)
assert_raises(IOError, fetch_lfw_pairs,
data_home=SCIKIT_LEARN_EMPTY_DATA,
download_if_missing=False)


def test_load_fake_lfw_pairs():
Expand Down
20 changes: 7 additions & 13 deletions sklearn/datasets/tests/test_svmlight_format.py
Expand Up @@ -15,7 +15,6 @@
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_raises_regex
from sklearn.utils.testing import raises
from sklearn.utils.testing import assert_in
from sklearn.utils.fixes import sp_version

Expand Down Expand Up @@ -138,20 +137,17 @@ def test_load_compressed():
assert_array_equal(y, ybz)


@raises(ValueError)
def test_load_invalid_file():
load_svmlight_file(invalidfile)
assert_raises(ValueError, load_svmlight_file, invalidfile)


@raises(ValueError)
def test_load_invalid_order_file():
load_svmlight_file(invalidfile2)
assert_raises(ValueError, load_svmlight_file, invalidfile2)


@raises(ValueError)
def test_load_zero_based():
f = BytesIO(b("-1 4:1.\n1 0:1\n"))
load_svmlight_file(f, zero_based=False)
assert_raises(ValueError, load_svmlight_file, f, zero_based=False)


def test_load_zero_based_auto():
Expand Down Expand Up @@ -186,21 +182,19 @@ def test_load_with_qid():
assert_array_equal(X.toarray(), [[.53, .12], [.13, .1], [.87, .12]])


@raises(ValueError)
def test_load_invalid_file2():
load_svmlight_files([datafile, invalidfile, datafile])
assert_raises(ValueError, load_svmlight_files,
[datafile, invalidfile, datafile])


@raises(TypeError)
def test_not_a_filename():
# in python 3 integers are valid file opening arguments (taken as unix
# file descriptors)
load_svmlight_file(.42)
assert_raises(TypeError, load_svmlight_file, .42)


@raises(IOError)
def test_invalid_filename():
load_svmlight_file("trou pic nic douille")
assert_raises(IOError, load_svmlight_file, "trou pic nic douille")


def test_dump():
Expand Down
12 changes: 9 additions & 3 deletions sklearn/decomposition/dict_learning.py
Expand Up @@ -927,9 +927,9 @@ def fit(self, X, y=None):
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training vector, where n_samples in the number of samples
and n_features is the number of features.
X : Ignored.
y : Ignored.
Returns
-------
Expand Down Expand Up @@ -1081,6 +1081,8 @@ def fit(self, X, y=None):
Training vector, where n_samples in the number of samples
and n_features is the number of features.
y : Ignored.
Returns
-------
self : object
Expand Down Expand Up @@ -1251,6 +1253,8 @@ def fit(self, X, y=None):
Training vector, where n_samples in the number of samples
and n_features is the number of features.
y : Ignored.
Returns
-------
self : object
Expand Down Expand Up @@ -1284,6 +1288,8 @@ def partial_fit(self, X, y=None, iter_offset=None):
Training vector, where n_samples in the number of samples
and n_features is the number of features.
y : Ignored.
iter_offset : integer, optional
The number of iteration on data batches that has been
performed before this call to partial_fit. This is optional:
Expand Down
4 changes: 4 additions & 0 deletions sklearn/decomposition/factor_analysis.py
Expand Up @@ -149,6 +149,8 @@ def fit(self, X, y=None):
X : array-like, shape (n_samples, n_features)
Training data.
y : Ignored.
Returns
-------
self
Expand Down Expand Up @@ -338,6 +340,8 @@ def score(self, X, y=None):
X : array, shape (n_samples, n_features)
The data
y : Ignored.
Returns
-------
ll : float
Expand Down
4 changes: 4 additions & 0 deletions sklearn/decomposition/fastica_.py
Expand Up @@ -509,6 +509,8 @@ def fit_transform(self, X, y=None):
Training data, where n_samples is the number of samples
and n_features is the number of features.
y : Ignored.
Returns
-------
X_new : array-like, shape (n_samples, n_components)
Expand All @@ -524,6 +526,8 @@ def fit(self, X, y=None):
Training data, where n_samples is the number of samples
and n_features is the number of features.
y : Ignored.
Returns
-------
self
Expand Down
4 changes: 3 additions & 1 deletion sklearn/decomposition/incremental_pca.py
Expand Up @@ -158,7 +158,7 @@ def fit(self, X, y=None):
Training data, where n_samples is the number of samples and
n_features is the number of features.
y : Passthrough for ``Pipeline`` compatibility.
y : Ignored.
Returns
-------
Expand Down Expand Up @@ -199,6 +199,8 @@ def partial_fit(self, X, y=None, check_input=True):
check_input : bool
Run check_array on X.
y : Ignored.
Returns
-------
self : object
Expand Down
4 changes: 4 additions & 0 deletions sklearn/decomposition/nmf.py
Expand Up @@ -1211,6 +1211,8 @@ def fit_transform(self, X, y=None, W=None, H=None):
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Data matrix to be decomposed
y : Ignored.
W : array-like, shape (n_samples, n_components)
If init='custom', it is used as initial guess for the solution.
Expand Down Expand Up @@ -1249,6 +1251,8 @@ def fit(self, X, y=None, **params):
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Data matrix to be decomposed
y : Ignored.
Returns
-------
self
Expand Down
6 changes: 6 additions & 0 deletions sklearn/decomposition/online_lda.py
Expand Up @@ -473,6 +473,8 @@ def partial_fit(self, X, y=None):
X : array-like or sparse matrix, shape=(n_samples, n_features)
Document word matrix.
y : Ignored.
Returns
-------
self
Expand Down Expand Up @@ -515,6 +517,8 @@ def fit(self, X, y=None):
X : array-like or sparse matrix, shape=(n_samples, n_features)
Document word matrix.
y : Ignored.
Returns
-------
self
Expand Down Expand Up @@ -714,6 +718,8 @@ def score(self, X, y=None):
X : array-like or sparse matrix, shape=(n_samples, n_features)
Document word matrix.
y : Ignored.
Returns
-------
score : float
Expand Down

0 comments on commit ee82d80

Please sign in to comment.