From c6bafed3abed14035dd5d2a0c14bd78d9aa7d996 Mon Sep 17 00:00:00 2001
From: Martin Ledoux <martin.ledoux@gmail.com>
Date: Thu, 19 Jul 2012 15:20:06 -0400
Subject: [PATCH 1/8] ENH: use Bayesian priors in Nearest Neighbors classifier
 (Issue 399)

---
 sklearn/neighbors/base.py           |  5 ++-
 sklearn/neighbors/classification.py | 68 ++++++++++++++++++++---------
 2 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index 357cd3eadfa09..68cafd574bd3c 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -567,8 +567,11 @@ def fit(self, X, y):
         y : {array-like, sparse matrix}, shape = [n_samples]
             Target values, array of integer values.
         """
-        self._y = np.asarray(y)
+        self._y = np.asarray(y, dtype=np.int)
         self._classes = np.sort(np.unique(y))
+        self.class_prior_ = np.zeros(self._classes.size)
+        for i, y_i in enumerate(self._classes):
+            self.class_prior_[i] = np.float(np.sum(y == y_i)) / len(y)
         return self._fit(X)
 
 
diff --git a/sklearn/neighbors/classification.py b/sklearn/neighbors/classification.py
index 4f2ada47460e4..ced3fafc74129 100644
--- a/sklearn/neighbors/classification.py
+++ b/sklearn/neighbors/classification.py
@@ -126,19 +126,8 @@ def predict(self, X):
         labels: array
             List of class labels (one for each data sample).
         """
-        X = atleast2d_or_csr(X)
-
-        neigh_dist, neigh_ind = self.kneighbors(X)
-        pred_labels = self._y[neigh_ind]
-
-        weights = _get_weights(neigh_dist, self.weights)
-
-        if weights is None:
-            mode, _ = stats.mode(pred_labels, axis=1)
-        else:
-            mode, _ = weighted_mode(pred_labels, weights, axis=1)
-
-        return mode.flatten().astype(np.int)
+        probabilities = self.predict_proba(X)
+        return self._classes[probabilities.argmax(axis=1)].astype(np.int)
 
     def predict_proba(self, X):
         """Return probability estimates for the test data X.
@@ -179,6 +168,13 @@ def predict_proba(self, X):
         for i, idx in enumerate(pred_indices.T):  # loop is O(n_neighbors)
             probabilities[all_rows, idx] += weights[:, i]
 
+        # Compute the unnormalized posterior probability, taking
+        # self.class_prior_ into consideration.
+        class_count = np.zeros(self._classes.size)
+        for k, c in enumerate(self._classes):
+            class_count[k] = np.sum(self._y == c)
+        probabilities = ((probabilities / class_count) * self.class_prior_)
+
         # normalize 'votes' into real [0,1] probabilities
         probabilities = (probabilities.T / probabilities.sum(axis=1)).T
 
@@ -310,13 +306,43 @@ def predict(self, X):
                                      'dataset')
 
         weights = _get_weights(neigh_dist, self.weights)
-
         if weights is None:
-            mode = np.asarray([stats.mode(pl)[0] for pl in pred_labels],
-                              dtype=np.int)
-        else:
-            mode = np.asarray([weighted_mode(pl, w)[0]
-                               for (pl, w) in zip(pred_labels, weights)],
-                              dtype=np.int)
+            # `neigh_dist` is an array of objects, where each
+            # object is a 1D array of indices.
+            weights = np.array([np.ones(len(row)) for row in neigh_dist])
+
+        probabilities = np.zeros((X.shape[0], self._classes.size))
+
+        # We cannot vectorize the following because of the way Python handles
+        # M += 1: if a predicted index was to occur more than once (for a
+        # given tested point), the corresponding element in `probabilities` 
+        # would still be incremented only once.
+        outliers = []  # row indices of the outliers (if any)
+        for row in range(len(pred_labels)):
+            pred_indices = pred_labels[row].copy()
+            if self.outlier_label and pred_indices == outlier_label:
+                # We'll impose the label for that row later.
+                outliers.append(row)
+                continue
+            for k, c in enumerate(self._classes):
+                pred_indices[pred_labels[row] == c] = k
+            for i, idx in enumerate(pred_indices):
+                probabilities[row, idx] += weights[row][i]
+
+        # Compute the unnormalized posterior probability, taking
+        # self.class_prior_ into consideration.
+        class_count = np.zeros(self._classes.size)
+        for k, c in enumerate(self._classes):
+            class_count[k] = np.sum(self._y == c)
+        probabilities = (probabilities / class_count) * self.class_prior_
+
+        # normalize 'votes' into real [0,1] probabilities
+        probabilities = (probabilities.T / probabilities.sum(axis=1)).T
+        
+        # Predict the class of each row, based on the maximum posterior
+        # probability. If needed, correct the predictions for outliers.
+        preds = self._classes[probabilities.argmax(axis=1)].astype(np.int)
+        if self.outlier_label:
+            preds[outliers] = self.outlier_label
 
-        return mode.flatten().astype(np.int)
+        return preds
\ No newline at end of file

From 3005a60432334da16accc12361c16b93121bbeb0 Mon Sep 17 00:00:00 2001
From: Martin Ledoux <martin.ledoux@gmail.com>
Date: Fri, 20 Jul 2012 10:30:21 -0400
Subject: [PATCH 2/8] use `unique` to get integers from labels

---
 sklearn/neighbors/base.py           |  4 +--
 sklearn/neighbors/classification.py | 38 +++++++++++------------------
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index 68cafd574bd3c..fd204571c9e86 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -15,6 +15,7 @@
 from ..base import BaseEstimator
 from ..metrics import pairwise_distances
 from ..utils import safe_asarray, atleast2d_or_csr
+from ..utils.fixes import unique
 
 
 class NeighborsWarning(UserWarning):
@@ -567,8 +568,7 @@ def fit(self, X, y):
         y : {array-like, sparse matrix}, shape = [n_samples]
             Target values, array of integer values.
         """
-        self._y = np.asarray(y, dtype=np.int)
-        self._classes = np.sort(np.unique(y))
+        self._classes, self._y = unique(y, return_inverse=True)
         self.class_prior_ = np.zeros(self._classes.size)
         for i, y_i in enumerate(self._classes):
             self.class_prior_[i] = np.float(np.sum(y == y_i)) / len(y)
diff --git a/sklearn/neighbors/classification.py b/sklearn/neighbors/classification.py
index ced3fafc74129..c41c85a30166d 100644
--- a/sklearn/neighbors/classification.py
+++ b/sklearn/neighbors/classification.py
@@ -146,22 +146,15 @@ def predict_proba(self, X):
         X = atleast2d_or_csr(X)
 
         neigh_dist, neigh_ind = self.kneighbors(X)
-        pred_labels = self._y[neigh_ind]
+        pred_indices = self._y[neigh_ind]
 
         weights = _get_weights(neigh_dist, self.weights)
 
         if weights is None:
-            weights = np.ones_like(pred_labels)
+            weights = np.ones_like(pred_indices)
 
         probabilities = np.zeros((X.shape[0], self._classes.size))
 
-        # Translate class label to a column index in probabilities array.
-        # This may not be needed provided classes labels are guaranteed to be
-        # np.arange(n_classes) (e.g. consecutive and starting with 0)
-        pred_indices = pred_labels.copy()
-        for k, c in enumerate(self._classes):
-            pred_indices[pred_labels == c] = k
-
         # a simple ':' index doesn't work right
         all_rows = np.arange(X.shape[0])
 
@@ -171,8 +164,8 @@ def predict_proba(self, X):
         # Compute the unnormalized posterior probability, taking
         # self.class_prior_ into consideration.
         class_count = np.zeros(self._classes.size)
-        for k, c in enumerate(self._classes):
-            class_count[k] = np.sum(self._y == c)
+        for k in range(self._classes.size):
+            class_count[k] = np.sum(self._y == k)
         probabilities = ((probabilities / class_count) * self.class_prior_)
 
         # normalize 'votes' into real [0,1] probabilities
@@ -287,6 +280,7 @@ def predict(self, X):
         neigh_dist, neigh_ind = self.radius_neighbors(X)
         pred_labels = [self._y[ind] for ind in neigh_ind]
 
+        outliers = []  # row indices of the outliers (if any)
         if self.outlier_label:
             outlier_label = np.array((self.outlier_label, ))
             small_value = np.array((1e-6, ))
@@ -295,6 +289,8 @@ def predict(self, X):
                 if len(pl) < 1:
                     pred_labels[i] = outlier_label
                     neigh_dist[i] = small_value
+                    # We'll impose the label for that row later.
+                    outliers.append(i)
         else:
             for pl in pred_labels:
                 # Check that all have at least 1 neighbor
@@ -317,23 +313,17 @@ def predict(self, X):
         # M += 1: if a predicted index was to occur more than once (for a
         # given tested point), the corresponding element in `probabilities` 
         # would still be incremented only once.
-        outliers = []  # row indices of the outliers (if any)
-        for row in range(len(pred_labels)):
-            pred_indices = pred_labels[row].copy()
-            if self.outlier_label and pred_indices == outlier_label:
-                # We'll impose the label for that row later.
-                outliers.append(row)
-                continue
-            for k, c in enumerate(self._classes):
-                pred_indices[pred_labels[row] == c] = k
-            for i, idx in enumerate(pred_indices):
-                probabilities[row, idx] += weights[row][i]
+        for i, pi in enumerate(pred_labels):
+            if len(pi) < 1:
+                continue  # outlier
+            for j, idx in enumerate(pi):
+                probabilities[i, idx] += weights[i][j]
 
         # Compute the unnormalized posterior probability, taking
         # self.class_prior_ into consideration.
         class_count = np.zeros(self._classes.size)
-        for k, c in enumerate(self._classes):
-            class_count[k] = np.sum(self._y == c)
+        for k in range(self._classes.size):
+            class_count[k] = np.sum(self._y == k)
         probabilities = (probabilities / class_count) * self.class_prior_
 
         # normalize 'votes' into real [0,1] probabilities

From 884e7e29a4540ca51319fbf88700920365129706 Mon Sep 17 00:00:00 2001
From: Martin Ledoux <martin.ledoux@gmail.com>
Date: Fri, 20 Jul 2012 14:52:37 -0400
Subject: [PATCH 3/8] replace 3 loops by calls to `np.bincount`

---
 sklearn/neighbors/classification.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/sklearn/neighbors/classification.py b/sklearn/neighbors/classification.py
index c41c85a30166d..dad2d07e6f96f 100644
--- a/sklearn/neighbors/classification.py
+++ b/sklearn/neighbors/classification.py
@@ -163,10 +163,8 @@ def predict_proba(self, X):
 
         # Compute the unnormalized posterior probability, taking
         # self.class_prior_ into consideration.
-        class_count = np.zeros(self._classes.size)
-        for k in range(self._classes.size):
-            class_count[k] = np.sum(self._y == k)
-        probabilities = ((probabilities / class_count) * self.class_prior_)
+        class_count = np.bincount(self._y)
+        probabilities = (probabilities / class_count) * self.class_prior_
 
         # normalize 'votes' into real [0,1] probabilities
         probabilities = (probabilities.T / probabilities.sum(axis=1)).T
@@ -282,13 +280,9 @@ def predict(self, X):
 
         outliers = []  # row indices of the outliers (if any)
         if self.outlier_label:
-            outlier_label = np.array((self.outlier_label, ))
-            small_value = np.array((1e-6, ))
             for i, pl in enumerate(pred_labels):
                 # Check that all have at least 1 neighbor
                 if len(pl) < 1:
-                    pred_labels[i] = outlier_label
-                    neigh_dist[i] = small_value
                     # We'll impose the label for that row later.
                     outliers.append(i)
         else:
@@ -316,14 +310,16 @@ def predict(self, X):
         for i, pi in enumerate(pred_labels):
             if len(pi) < 1:
                 continue  # outlier
-            for j, idx in enumerate(pi):
-                probabilities[i, idx] += weights[i][j]
+            # When we support NumPy >= 1.6, we'll be able to simply use:
+            # np.bincount(pi, weights, minlength=self._classes.size)
+            unpadded_probs = np.bincount(pi, weights[i])
+            probabilities[i] = np.append(unpadded_probs,
+                                         np.zeros(self._classes.size -
+                                                  unpadded_probs.shape[0]))
 
         # Compute the unnormalized posterior probability, taking
         # self.class_prior_ into consideration.
-        class_count = np.zeros(self._classes.size)
-        for k in range(self._classes.size):
-            class_count[k] = np.sum(self._y == k)
+        class_count = np.bincount(self._y)
         probabilities = (probabilities / class_count) * self.class_prior_
 
         # normalize 'votes' into real [0,1] probabilities

From 902dcb6c972d50671764f68952ed4702adfbf23d Mon Sep 17 00:00:00 2001
From: Martin Ledoux <martin.ledoux@gmail.com>
Date: Tue, 24 Jul 2012 10:51:48 -0400
Subject: [PATCH 4/8] replace loop by call to `np.bincount`

---
 sklearn/neighbors/base.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index fd204571c9e86..a277731cc774b 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -569,9 +569,7 @@ def fit(self, X, y):
             Target values, array of integer values.
         """
         self._classes, self._y = unique(y, return_inverse=True)
-        self.class_prior_ = np.zeros(self._classes.size)
-        for i, y_i in enumerate(self._classes):
-            self.class_prior_[i] = np.float(np.sum(y == y_i)) / len(y)
+        self.class_prior_ = np.bincount(self._y).astype(float) / len(self._y)
         return self._fit(X)
 
 

From 3680fc55d35d199d4f0452f4204a70bfc7e15d74 Mon Sep 17 00:00:00 2001
From: Martin Ledoux <martin.ledoux@gmail.com>
Date: Tue, 24 Jul 2012 13:08:51 -0400
Subject: [PATCH 5/8] parameter `class_prior` is processed by constructor

---
 sklearn/neighbors/base.py           | 34 ++++++++++++++++-
 sklearn/neighbors/classification.py | 57 +++++++++++++++++++++++++++--
 2 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index a277731cc774b..e274f5f680397 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -69,6 +69,39 @@ def _get_weights(dist, weights):
         raise ValueError("weights not recognized: should be 'uniform', "
                             "'distance', or a callable function")
 
+def _check_class_prior(class_prior):
+    """Check to make sure class prior is valid."""
+    if class_prior in (None, 'default', 'flat'):
+        return class_prior
+    elif isinstance(class_prior, (list, np.ndarray)):
+        return class_prior
+    else:
+        raise ValueError("class prior not recognized: should be 'default', "
+                         "'flat', or a list or ndarray")
+
+def _get_class_prior(y, class_prior):
+    """Get class prior from targets ``y`` and parameter ``class_prior``
+    
+    Parameters
+    ==========
+    y : ndarray
+        The target labels, from 0 to ``n-1`` (thus ``n`` classes)
+    class_prior: {'default', 'flat' or a dict}
+        The class prior probabilities to use
+    
+    Returns
+    =======
+    class_prior_arr: array of the same shape as ``np.unique(y)``
+    """
+    if class_prior in (None, 'default'):
+        return np.bincount(y).astype(float) / len(y)
+    elif class_prior == 'flat':
+        return np.ones((len(np.unique(y)),)) / len(np.unique(y))
+    elif isinstance(class_prior, (list, np.ndarray)):
+        return class_prior
+    else:
+        raise ValueError("class prior not recognized: should be 'default', "
+                         "'flat', or a list or ndarray")
 
 class NeighborsBase(BaseEstimator):
     """Base class for nearest neighbors estimators."""
@@ -569,7 +602,6 @@ def fit(self, X, y):
             Target values, array of integer values.
         """
         self._classes, self._y = unique(y, return_inverse=True)
-        self.class_prior_ = np.bincount(self._y).astype(float) / len(self._y)
         return self._fit(X)
 
 
diff --git a/sklearn/neighbors/classification.py b/sklearn/neighbors/classification.py
index dad2d07e6f96f..549240c5a12f7 100644
--- a/sklearn/neighbors/classification.py
+++ b/sklearn/neighbors/classification.py
@@ -13,6 +13,7 @@
 
 from .base import \
     _check_weights, _get_weights, \
+    _check_class_prior, _get_class_prior, \
     NeighborsBase, KNeighborsMixin,\
     RadiusNeighborsMixin, SupervisedIntegerMixin
 from ..base import ClassifierMixin
@@ -42,6 +43,18 @@ class KNeighborsClassifier(NeighborsBase, KNeighborsMixin,
 
         Uniform weights are used by default.
 
+    class_prior : str, list or ndarray, optional (default = 'default')
+        class prior probabilities used in prediction. Possible values:
+
+        - 'default': default prior probabilities. For each class, its
+          prior probability is the proportion of points in the dataset
+          that are in this class.
+        - 'flat': equiprobable prior probabilites. If there are C classes,
+          then the prior probability for every class is 1/C.
+        - [list or ndarray]: a used-defined list or ndarray, listing
+          the prior class probability for each class, in increasing order
+          of class label.
+
     algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
         Algorithm used to compute the nearest neighbors:
 
@@ -86,6 +99,11 @@ class KNeighborsClassifier(NeighborsBase, KNeighborsMixin,
     [0]
     >>> print(neigh.predict_proba([[0.9]]))
     [[ 0.66666667  0.33333333]]
+    >>> neigh = KNeighborsClassifier(n_neighbors=3, class_prior=[0.75, 0.25])
+    >>> neigh.fit(X, y) # doctest: +ELLIPSIS
+    KNeighborsClassifier(...)
+    >>> print(neigh.predict_proba([[2.0]]))
+    [[ 0.6  0.4]]
 
     See also
     --------
@@ -100,10 +118,16 @@ class KNeighborsClassifier(NeighborsBase, KNeighborsMixin,
     for a discussion of the choice of ``algorithm`` and ``leaf_size``.
 
     http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm
+    
+    References
+    ----------
+    Bishop, Christopher M. *Pattern Recognition and Machine Learning*.
+        New York: Springer, 2006, p. 124-7.
     """
 
     def __init__(self, n_neighbors=5,
                  weights='uniform',
+                 class_prior='default',
                  algorithm='auto', leaf_size=30,
                  warn_on_equidistant=True, p=2):
         self._init_params(n_neighbors=n_neighbors,
@@ -112,6 +136,7 @@ def __init__(self, n_neighbors=5,
                           warn_on_equidistant=warn_on_equidistant,
                           p=p)
         self.weights = _check_weights(weights)
+        self.class_prior = _check_class_prior(class_prior)
 
     def predict(self, X):
         """Predict the class labels for the provided data
@@ -164,11 +189,11 @@ def predict_proba(self, X):
         # Compute the unnormalized posterior probability, taking
         # self.class_prior_ into consideration.
         class_count = np.bincount(self._y)
-        probabilities = (probabilities / class_count) * self.class_prior_
+        class_prior = _get_class_prior(self._y, self.class_prior)
+        probabilities = (probabilities / class_count) * class_prior
 
         # normalize 'votes' into real [0,1] probabilities
         probabilities = (probabilities.T / probabilities.sum(axis=1)).T
-
         return probabilities
 
 
@@ -196,6 +221,18 @@ class RadiusNeighborsClassifier(NeighborsBase, RadiusNeighborsMixin,
 
         Uniform weights are used by default.
 
+    class_prior : str, list or ndarray, optional (default = 'default')
+        class prior probabilities used in prediction. Possible values:
+
+        - 'default': default prior probabilities. For each class, its
+          prior probability is the proportion of points in the dataset
+          that are in this class.
+        - 'flat': equiprobable prior probabilites. If there are C classes,
+          then the prior probability for every class is 1/C.
+        - [list or ndarray]: a used-defined list or ndarray, listing
+          the prior class probability for each class, in increasing order
+          of class label.
+
     algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
         Algorithm used to compute the nearest neighbors:
 
@@ -235,6 +272,11 @@ class RadiusNeighborsClassifier(NeighborsBase, RadiusNeighborsMixin,
     RadiusNeighborsClassifier(...)
     >>> print(neigh.predict([[1.5]]))
     [0]
+    >>> neigh = RadiusNeighborsClassifier(radius=1.0, class_prior=[0.2, 0.8])
+    >>> neigh.fit(X, y) # doctest: +ELLIPSIS
+    RadiusNeighborsClassifier(...)
+    >>> print(neigh.predict([[1.5]]))
+    [1]
 
     See also
     --------
@@ -249,15 +291,21 @@ class RadiusNeighborsClassifier(NeighborsBase, RadiusNeighborsMixin,
     for a discussion of the choice of ``algorithm`` and ``leaf_size``.
 
     http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm
+
+    References
+    ----------
+    Bishop, Christopher M. *Pattern Recognition and Machine Learning*.
+        New York: Springer, 2006, p. 124-7.
     """
 
-    def __init__(self, radius=1.0, weights='uniform',
+    def __init__(self, radius=1.0, weights='uniform', class_prior=None,
                  algorithm='auto', leaf_size=30, p=2, outlier_label=None):
         self._init_params(radius=radius,
                           algorithm=algorithm,
                           leaf_size=leaf_size,
                           p=p)
         self.weights = _check_weights(weights)
+        self.class_prior = _check_class_prior(class_prior)
         self.outlier_label = outlier_label
 
     def predict(self, X):
@@ -320,7 +368,8 @@ def predict(self, X):
         # Compute the unnormalized posterior probability, taking
         # self.class_prior_ into consideration.
         class_count = np.bincount(self._y)
-        probabilities = (probabilities / class_count) * self.class_prior_
+        class_prior = _get_class_prior(self._y, self.class_prior)
+        probabilities = (probabilities / class_count) * class_prior
 
         # normalize 'votes' into real [0,1] probabilities
         probabilities = (probabilities.T / probabilities.sum(axis=1)).T

From e83c711866954cf1656fb0c4f38d3fc1bbda6710 Mon Sep 17 00:00:00 2001
From: Martin Ledoux <martin.ledoux@gmail.com>
Date: Tue, 24 Jul 2012 14:32:16 -0400
Subject: [PATCH 6/8] implement `predict` in terms of `predict_proba`

---
 sklearn/neighbors/classification.py | 56 ++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 9 deletions(-)

diff --git a/sklearn/neighbors/classification.py b/sklearn/neighbors/classification.py
index 549240c5a12f7..3b2c7bf46268b 100644
--- a/sklearn/neighbors/classification.py
+++ b/sklearn/neighbors/classification.py
@@ -272,6 +272,8 @@ class prior probabilities used in prediction. Possible values:
     RadiusNeighborsClassifier(...)
     >>> print(neigh.predict([[1.5]]))
     [0]
+    >>> print(neigh.predict_proba([[1.0]]))
+    [[ 0.66666667  0.33333333]]
     >>> neigh = RadiusNeighborsClassifier(radius=1.0, class_prior=[0.2, 0.8])
     >>> neigh.fit(X, y) # doctest: +ELLIPSIS
     RadiusNeighborsClassifier(...)
@@ -321,13 +323,47 @@ def predict(self, X):
         labels: array
             List of class labels (one for each data sample).
         """
+        if self.outlier_label != None:
+            probabilities, outliers = self.predict_proba(X)
+        else:
+            probabilities = self.predict_proba(X)
+        # Predict the class of each row, based on the maximum posterior
+        # probability. If needed, correct the predictions for outliers.
+        preds = self._classes[probabilities.argmax(axis=1)].astype(np.int)
+        if self.outlier_label != None:
+            preds[outliers] = self.outlier_label
+
+        return preds
+
+    def predict_proba(self, X):
+        """Return probability estimates for the test data X.
+
+        Parameters
+        ----------
+        X: array, shape = (n_samples, n_features)
+            A 2-D array representing the test points.
+
+        Returns
+        -------
+        probabilities : array, shape = [n_samples, n_classes]
+            Probabilities of the samples for each class in the model,
+            where classes are ordered arithmetically. If an outlier label
+            has been provided and is part of the actual classes, then
+            outliers will be assigned to that label with probability 1; if
+            the outlier label (e.g. -1) is not part of the actual classes,
+            then outliers will have probability 0 for every actual class.
+        outliers : list, length = n_samples
+            List of row indices in X that are outliers. Returned only if
+            self.outlier_label is not set to None.
+        """
         X = atleast2d_or_csr(X)
 
         neigh_dist, neigh_ind = self.radius_neighbors(X)
         pred_labels = [self._y[ind] for ind in neigh_ind]
 
         outliers = []  # row indices of the outliers (if any)
-        if self.outlier_label:
+        # Test with None, since outlier_label could legitimately be 0
+        if self.outlier_label != None:
             for i, pl in enumerate(pred_labels):
                 # Check that all have at least 1 neighbor
                 if len(pl) < 1:
@@ -357,6 +393,7 @@ def predict(self, X):
         # would still be incremented only once.
         for i, pi in enumerate(pred_labels):
             if len(pi) < 1:
+                probabilities[i] = 1e-6  # prevent division by zero later
                 continue  # outlier
             # When we support NumPy >= 1.6, we'll be able to simply use:
             # np.bincount(pi, weights, minlength=self._classes.size)
@@ -373,11 +410,12 @@ def predict(self, X):
 
         # normalize 'votes' into real [0,1] probabilities
         probabilities = (probabilities.T / probabilities.sum(axis=1)).T
-        
-        # Predict the class of each row, based on the maximum posterior
-        # probability. If needed, correct the predictions for outliers.
-        preds = self._classes[probabilities.argmax(axis=1)].astype(np.int)
-        if self.outlier_label:
-            preds[outliers] = self.outlier_label
-
-        return preds
\ No newline at end of file
+        if self.outlier_label != None:
+            probabilities[outliers] = 0.
+            outlier_indices = np.nonzero(self._classes ==
+                                       self.outlier_label)[0]
+            if outlier_indices.size > 0:
+                probabilities[outliers, outlier_indices[0]] = 1
+            return probabilities, outliers
+        else:
+            return probabilities
\ No newline at end of file

From 089dece4062fb07c499bdeffff681f0c8b1db28d Mon Sep 17 00:00:00 2001
From: Martin Ledoux <martin.ledoux@gmail.com>
Date: Wed, 25 Jul 2012 09:39:59 -0400
Subject: [PATCH 7/8] integrate `class_prior` parameter to narrative doc

---
 doc/modules/neighbors.rst                     | 23 ++++++++++++++++---
 .../supervised_learning.rst                   |  4 ++--
 examples/neighbors/plot_classification.py     | 10 ++++----
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
index b19f6c9063e67..0d85725aab83a 100644
--- a/doc/modules/neighbors.rst
+++ b/doc/modules/neighbors.rst
@@ -59,7 +59,7 @@ Nearest Neighbors Classification
 Neighbors-based classification is a type of *instance-based learning* or
 *non-generalizing learning*: it does not attempt to construct a general
 internal model, but simply stores instances of the training data.
-Classification is computed from a simple majority vote of the nearest
+The basic classification is computed from a simple majority vote of the nearest
 neighbors of each point: a query point is assigned the data class which
 has the most representatives within the nearest neighbors of the point.
 
@@ -94,7 +94,19 @@ be accomplished through the ``weights`` keyword.  The default value,
 distance from the query point.  Alternatively, a user-defined function of the
 distance can be supplied which is used to compute the weights.
 
-
+The nearest neighbors classification algorithm is implicitly based on
+probability theory: a query point :math:`x` is assigned to the class
+:math:`C_k` to which it has the highest probability of belonging. This
+*posterior probability* is computed using Bayes' rule:
+:math:`P(C_k \mid  x) = \frac{P(x \mid C_k) P(C_k)}{P(x)}`.
+The basic nearest neighbors classification (when ``class_prior='default'``)
+uses a default *prior probability* :math:`P(C_k)` equal to the proportion of
+training points which belong to class :math:`C_k`. In contrast, using
+a flat prior (``class_prior='flat'``) assigns the same value (1 over the
+number of classes) to each class prior probability :math:`P(C_k)`.
+Alternatively, a user-defined list of the class prior probabilities (in
+increasing order of class labels) can be supplied which is used to classify
+the query points.
 
 .. |classification_1| image:: ../auto_examples/neighbors/images/plot_classification_1.png
    :target: ../auto_examples/neighbors/plot_classification.html
@@ -111,6 +123,11 @@ distance can be supplied which is used to compute the weights.
   * :ref:`example_neighbors_plot_classification.py`: an example of
     classification using nearest neighbors.
 
+.. topic:: References:
+
+  *  `Pattern Recognition and Machine Learning`,
+     Bishop, C.M., New York: Springer (2006), p. 124-127
+
 .. _regression:
 
 Nearest Neighbors Regression
@@ -118,7 +135,7 @@ Nearest Neighbors Regression
 
 Neighbors-based regression can be used in cases where the data labels are
 continuous rather than discrete variables.  The label assigned to a query
-point is computed based the mean of the labels of its nearest neighbors.
+point is computed based on the mean of the labels of its nearest neighbors.
 
 scikit-learn implements two different neighbors regressors:
 :class:`KNeighborsRegressor` implements learning based on the :math:`k`
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index ca57af93b3eaa..1242bcca46c62 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -95,8 +95,8 @@ Scikit-learn documentation for more information about this type of classifier.)
     >>> from sklearn.neighbors import KNeighborsClassifier
     >>> knn = KNeighborsClassifier()
     >>> knn.fit(iris_X_train, iris_y_train)
-    KNeighborsClassifier(algorithm='auto', leaf_size=30, n_neighbors=5, p=2,
-               warn_on_equidistant=True, weights='uniform')
+    KNeighborsClassifier(algorithm='auto', class_prior='default', leaf_size=30,
+               n_neighbors=5, p=2, warn_on_equidistant=True, weights='uniform')
     >>> knn.predict(iris_X_test)
     array([1, 2, 1, 0, 0, 0, 2, 1, 2, 0])
     >>> iris_y_test
diff --git a/examples/neighbors/plot_classification.py b/examples/neighbors/plot_classification.py
index 209820159b9ed..8bfd2bb5e77c6 100644
--- a/examples/neighbors/plot_classification.py
+++ b/examples/neighbors/plot_classification.py
@@ -27,9 +27,11 @@
 cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
 cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
 
-for weights in ['uniform', 'distance']:
+for weights, class_prior in zip(['uniform', 'distance'],
+                                ['default', [0.8, 0.1, 0.1]]):
     # we create an instance of Neighbours Classifier and fit the data.
-    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
+    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights,
+                                         class_prior=class_prior)
     clf.fit(X, y)
 
     # Plot the decision boundary. For that, we will asign a color to each
@@ -47,8 +49,8 @@
 
     # Plot also the training points
     pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
-    pl.title("3-Class classification (k = %i, weights = '%s')"
-             % (n_neighbors, weights))
+    pl.title("3-Class classification (k = %i,\nweights = '%s', class_prior = '%s')"
+             % (n_neighbors, weights, class_prior))
     pl.axis('tight')
 
 pl.show()

From 4dec520bc1556b6da0e04ff2a737d60bbc658122 Mon Sep 17 00:00:00 2001
From: Martin Ledoux <martin.ledoux@gmail.com>
Date: Sun, 29 Jul 2012 10:54:04 -0400
Subject: [PATCH 8/8] correct minor mistakes in documentation

---
 doc/modules/neighbors.rst | 10 ++++++++--
 sklearn/neighbors/base.py |  8 ++++----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
index 0d85725aab83a..5faf71c2f9fcc 100644
--- a/doc/modules/neighbors.rst
+++ b/doc/modules/neighbors.rst
@@ -94,8 +94,8 @@ be accomplished through the ``weights`` keyword.  The default value,
 distance from the query point.  Alternatively, a user-defined function of the
 distance can be supplied which is used to compute the weights.
 
-The nearest neighbors classification algorithm is implicitly based on
-probability theory: a query point :math:`x` is assigned to the class
+There is a probabilistic interpretation of nearest neighbors classification:
+a query point :math:`x` is assigned to the class
 :math:`C_k` to which it has the highest probability of belonging. This
 *posterior probability* is computed using Bayes' rule:
 :math:`P(C_k \mid  x) = \frac{P(x \mid C_k) P(C_k)}{P(x)}`.
@@ -108,6 +108,12 @@ Alternatively, a user-defined list of the class prior probabilities (in
 increasing order of class labels) can be supplied which is used to classify
 the query points.
 
+The second example below illustrates the effect of assigning a much greater
+prior probability (0.8) to the first class (in red) than the other two: in
+regions where few data points appear, for example around the point (7, 4.5), 
+the model is more biased toward the red class than it was in the first 
+example.
+
 .. |classification_1| image:: ../auto_examples/neighbors/images/plot_classification_1.png
    :target: ../auto_examples/neighbors/plot_classification.html
    :scale: 50
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index e274f5f680397..8ca57f55f1654 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -46,14 +46,14 @@ def _get_weights(dist, weights):
     """Get the weights from an array of distances and a parameter ``weights``
 
     Parameters
-    ===========
+    ----------
     dist: ndarray
         The input distances
     weights: {'uniform', 'distance' or a callable}
         The kind of weighting used
 
     Returns
-    ========
+    -------
     weights_arr: array of the same shape as ``dist``
         if ``weights == 'uniform'``, then returns None
     """
@@ -83,14 +83,14 @@ def _get_class_prior(y, class_prior):
     """Get class prior from targets ``y`` and parameter ``class_prior``
     
     Parameters
-    ==========
+    ----------
     y : ndarray
         The target labels, from 0 to ``n-1`` (thus ``n`` classes)
     class_prior: {'default', 'flat' or a dict}
         The class prior probabilities to use
     
     Returns
-    =======
+    -------
     class_prior_arr: array of the same shape as ``np.unique(y)``
     """
     if class_prior in (None, 'default'):