Skip to content

Commit

Permalink
FIX bug with set_params on minkowski 'p'.
Browse files Browse the repository at this point in the history
Fixes scikit-learn#2609.

There was a bug that set_params didn't effectively change the value
of minkowski 'p'. Because 'p' was put in self.metric_kwds in
_init_params and then it never changed there.
  • Loading branch information
Nikolay Mayorov authored and kashif committed Sep 12, 2014
1 parent 1261a36 commit 536085e
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 44 deletions.
52 changes: 36 additions & 16 deletions sklearn/neighbors/base.py
Expand Up @@ -98,13 +98,22 @@ def __init__(self):

def _init_params(self, n_neighbors=None, radius=None,
algorithm='auto', leaf_size=30, metric='minkowski',
p=2, **kwargs):
p=2, metric_params=None, **kwargs):
if kwargs:
warnings.warn("Passing additional arguments to the metric "
"function as **kwargs is deprecated. "
"Use metric_params dict instead.",
DeprecationWarning, stacklevel=3)
if metric_params is None:
metric_params = {}
metric_params.update(kwargs)

self.n_neighbors = n_neighbors
self.radius = radius
self.algorithm = algorithm
self.leaf_size = leaf_size
self.metric = metric
self.metric_kwds = kwargs
self.metric_params = metric_params
self.p = p

if algorithm not in ['auto', 'brute',
Expand All @@ -126,24 +135,35 @@ def _init_params(self, n_neighbors=None, radius=None,
raise ValueError("Metric '%s' not valid for algorithm '%s'"
% (metric, algorithm))

if self.metric in ['wminkowski', 'minkowski']:
self.metric_kwds['p'] = p
if p < 1:
raise ValueError("p must be greater than one "
"for minkowski metric")
if self.metric_params is not None and 'p' in self.metric_params:
warnings.warn("Parameter p is found in metric_params. "
"The corresponding parameter from __init__ "
"is ignored.", SyntaxWarning, stacklevel=3)
effective_p = metric_params['p']
else:
effective_p = self.p

if self.metric in ['wminkowski', 'minkowski'] and effective_p < 1:
raise ValueError("p must be greater than one for minkowski metric")

self._fit_X = None
self._tree = None
self._fit_method = None

def _fit(self, X):
self.effective_metric_ = self.metric
self.effective_metric_kwds_ = self.metric_kwds
if self.metric_params is None:
self.effective_metric_params_ = {}
else:
self.effective_metric_params_ = self.metric_params.copy()

effective_p = self.effective_metric_params_.get('p', self.p)
if self.metric in ['wminkowski', 'minkowski']:
self.effective_metric_params_['p'] = effective_p

self.effective_metric_ = self.metric
# For minkowski distance, use more efficient methods where available
if self.metric == 'minkowski':
self.effective_metric_kwds_ = self.metric_kwds.copy()
p = self.effective_metric_kwds_.pop('p', 2)
p = self.effective_metric_params_.pop('p', 2)
if p < 1:
raise ValueError("p must be greater than one "
"for minkowski metric")
Expand All @@ -154,7 +174,7 @@ def _fit(self, X):
elif p == np.inf:
self.effective_metric_ = 'chebyshev'
else:
self.effective_metric_kwds_['p'] = p
self.effective_metric_params_['p'] = p

if isinstance(X, NeighborsBase):
self._fit_X = X._fit_X
Expand Down Expand Up @@ -210,11 +230,11 @@ def _fit(self, X):
if self._fit_method == 'ball_tree':
self._tree = BallTree(X, self.leaf_size,
metric=self.effective_metric_,
**self.effective_metric_kwds_)
**self.effective_metric_params_)
elif self._fit_method == 'kd_tree':
self._tree = KDTree(X, self.leaf_size,
metric=self.effective_metric_,
**self.effective_metric_kwds_)
**self.effective_metric_params_)
elif self._fit_method == 'brute':
self._tree = None
else:
Expand Down Expand Up @@ -292,7 +312,7 @@ class from an array representing our data set and ask who's
else:
dist = pairwise_distances(X, self._fit_X,
self.effective_metric_,
**self.effective_metric_kwds_)
**self.effective_metric_params_)

neigh_ind = argpartition(dist, n_neighbors - 1, axis=1)
neigh_ind = neigh_ind[:, :n_neighbors]
Expand Down Expand Up @@ -456,7 +476,7 @@ class from an array representing our data set and ask who's
else:
dist = pairwise_distances(X, self._fit_X,
self.effective_metric_,
**self.effective_metric_kwds_)
**self.effective_metric_params_)
neigh_ind = [np.where(d < radius)[0] for d in dist]

# if there are the same number of neighbors for each point,
Expand Down
23 changes: 12 additions & 11 deletions sklearn/neighbors/classification.py
Expand Up @@ -61,7 +61,7 @@ class KNeighborsClassifier(NeighborsBase, KNeighborsMixin,
required to store the tree. The optimal value depends on the
nature of the problem.
metric : string or DistanceMetric object (default='minkowski')
metric : string or DistanceMetric object (default = 'minkowski')
the distance metric to use for the tree. The default metric is
minkowski, and with p=2 is equivalent to the standard Euclidean
metric. See the documentation of the DistanceMetric class for a
Expand All @@ -72,9 +72,8 @@ class KNeighborsClassifier(NeighborsBase, KNeighborsMixin,
equivalent to using manhattan_distance (l1), and euclidean_distance
(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
**kwargs :
additional keyword arguments are passed to the distance function as
additional arguments.
metric_params: dict, optional (default = None)
additional keyword arguments for the metric function.
Examples
--------
Expand Down Expand Up @@ -113,10 +112,12 @@ class KNeighborsClassifier(NeighborsBase, KNeighborsMixin,

def __init__(self, n_neighbors=5,
weights='uniform', algorithm='auto', leaf_size=30,
p=2, metric='minkowski', **kwargs):
p=2, metric='minkowski', metric_params=None, **kwargs):

self._init_params(n_neighbors=n_neighbors,
algorithm=algorithm,
leaf_size=leaf_size, metric=metric, p=p, **kwargs)
leaf_size=leaf_size, metric=metric, p=p,
metric_params=metric_params, **kwargs)
self.weights = _check_weights(weights)

def predict(self, X):
Expand Down Expand Up @@ -277,9 +278,8 @@ class RadiusNeighborsClassifier(NeighborsBase, RadiusNeighborsMixin,
neighbors on given radius).
If set to None, ValueError is raised, when outlier is detected.
**kwargs :
additional keyword arguments are passed to the distance function as
additional arguments.
metric_params: dict, optional (default = None)
additional keyword arguments for the metric function.
Examples
--------
Expand Down Expand Up @@ -309,11 +309,12 @@ class RadiusNeighborsClassifier(NeighborsBase, RadiusNeighborsMixin,

def __init__(self, radius=1.0, weights='uniform',
algorithm='auto', leaf_size=30, p=2, metric='minkowski',
outlier_label=None, **kwargs):
outlier_label=None, metric_params=None, **kwargs):
self._init_params(radius=radius,
algorithm=algorithm,
leaf_size=leaf_size,
metric=metric, p=p, **kwargs)
metric=metric, p=p, metric_params=metric_params,
**kwargs)
self.weights = _check_weights(weights)
self.outlier_label = outlier_label

Expand Down
20 changes: 10 additions & 10 deletions sklearn/neighbors/regression.py
Expand Up @@ -72,9 +72,8 @@ class KNeighborsRegressor(NeighborsBase, KNeighborsMixin,
equivalent to using manhattan_distance (l1), and euclidean_distance
(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
**kwargs :
additional keyword arguments are passed to the distance function as
additional arguments.
metric_params: dict, optional (default = None)
additional keyword arguments for the metric function.
Examples
--------
Expand Down Expand Up @@ -111,10 +110,11 @@ class KNeighborsRegressor(NeighborsBase, KNeighborsMixin,

def __init__(self, n_neighbors=5, weights='uniform',
algorithm='auto', leaf_size=30,
p=2, metric='minkowski', **kwargs):
p=2, metric='minkowski', metric_params=None, **kwargs):
self._init_params(n_neighbors=n_neighbors,
algorithm=algorithm,
leaf_size=leaf_size, metric=metric, p=p, **kwargs)
leaf_size=leaf_size, metric=metric, p=p,
metric_params=metric_params, **kwargs)
self.weights = _check_weights(weights)

def predict(self, X):
Expand Down Expand Up @@ -213,9 +213,8 @@ class RadiusNeighborsRegressor(NeighborsBase, RadiusNeighborsMixin,
equivalent to using manhattan_distance (l1), and euclidean_distance
(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
**kwargs :
additional keyword arguments are passed to the distance function as
additional arguments.
metric_params: dict, optional (default = None)
additional keyword arguments for the metric function.
Examples
--------
Expand Down Expand Up @@ -245,11 +244,12 @@ class RadiusNeighborsRegressor(NeighborsBase, RadiusNeighborsMixin,

def __init__(self, radius=1.0, weights='uniform',
algorithm='auto', leaf_size=30,
p=2, metric='minkowski', **kwargs):
p=2, metric='minkowski', metric_params=None, **kwargs):
self._init_params(radius=radius,
algorithm=algorithm,
leaf_size=leaf_size,
p=p, metric=metric, **kwargs)
p=p, metric=metric, metric_params=metric_params,
**kwargs)
self.weights = _check_weights(weights)

def predict(self, X):
Expand Down
18 changes: 13 additions & 5 deletions sklearn/neighbors/tests/test_neighbors.py
Expand Up @@ -10,6 +10,7 @@
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_true
from sklearn.utils.testing import assert_warns
from sklearn.utils.testing import ignore_warnings
from sklearn.utils.validation import check_random_state
from sklearn import neighbors, datasets
Expand Down Expand Up @@ -209,7 +210,6 @@ def test_kneighbors_classifier_predict_proba():
assert_array_almost_equal(real_prob, y_prob)



def test_radius_neighbors_classifier(n_samples=40,
n_features=5,
n_test_pts=10,
Expand Down Expand Up @@ -810,22 +810,23 @@ def test_neighbors_metrics(n_samples=20, n_features=3,

test = rng.rand(n_query_pts, n_features)

for metric, kwds in metrics:
for metric, metric_params in metrics:
results = []

p = metric_params.pop('p', 2)
for algorithm in algorithms:
# KD tree doesn't support all metrics
if (algorithm == 'kd_tree' and
metric not in neighbors.KDTree.valid_metrics):
assert_raises(ValueError,
neighbors.NearestNeighbors,
algorithm=algorithm,
metric=metric, **kwds)
metric=metric, metric_params=metric_params)
continue

neigh = neighbors.NearestNeighbors(n_neighbors=n_neighbors,
algorithm=algorithm,
metric=metric, **kwds)
metric=metric, p=p,
metric_params=metric_params)
neigh.fit(X)
results.append(neigh.kneighbors(test, return_distance=True))

Expand All @@ -849,6 +850,13 @@ def test_callable_metric():
assert_array_almost_equal(dist1, dist2)


def test_metric_params_interface():
assert_warns(DeprecationWarning, neighbors.KNeighborsClassifier,
metric='wminkowski', w=np.ones(10))
assert_warns(SyntaxWarning, neighbors.KNeighborsClassifier,
metric_params={'p': 3})


if __name__ == '__main__':
import nose
nose.runmodule()
9 changes: 7 additions & 2 deletions sklearn/neighbors/unsupervised.py
Expand Up @@ -43,6 +43,9 @@ class NearestNeighbors(NeighborsBase, KNeighborsMixin,
equivalent to using manhattan_distance (l1), and euclidean_distance
(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
metric_params: dict, optional (default = None)
additional keyword arguments for the metric function.
Examples
--------
>>> from sklearn.neighbors import NearestNeighbors
Expand Down Expand Up @@ -76,8 +79,10 @@ class NearestNeighbors(NeighborsBase, KNeighborsMixin,
"""

def __init__(self, n_neighbors=5, radius=1.0,
algorithm='auto', leaf_size=30, metric='minkowski', **kwargs):
algorithm='auto', leaf_size=30, metric='minkowski',
p=2, metric_params=None, **kwargs):
self._init_params(n_neighbors=n_neighbors,
radius=radius,
algorithm=algorithm,
leaf_size=leaf_size, metric=metric, **kwargs)
leaf_size=leaf_size, metric=metric, p=p,
metric_params=metric_params, **kwargs)

0 comments on commit 536085e

Please sign in to comment.