Permalink
Browse files

Refactor MultinomialNB and BernoulliNB: introduce BaseDiscreteNB

Maybe GaussianNB should share some of the code as well (?)
  • Loading branch information...
larsmans committed Jun 10, 2011
1 parent f035f4e commit 794605a90074ce041c6384b0a4a343ad8a49c858
Showing with 107 additions and 116 deletions.
  1. +107 −116 scikits/learn/naive_bayes.py
@@ -209,77 +209,9 @@ def atleast2d_or_csr(X):
return np.atleast_2d(X)
-class MultinomialNB(BaseEstimator, ClassifierMixin):
- """
- Naive Bayes classifier for multinomial models
-
- The multinomial Naive Bayes classifier is suitable for classification with
- discrete features (e.g., word counts for text classification). The
- multinomial distribution normally requires integer feature counts. However,
- in practice, fractional counts such as tf-idf may also work.
-
- This class is designed to handle both dense and sparse data; it will enter
- "sparse mode" if its training matrix (X) is a sparse matrix.
-
- Parameters
- ----------
- alpha: float, optional (default=1.0)
- Additive (Laplace/Lidstone) smoothing parameter
- (0 for no smoothing).
- fit_prior: boolean
- Whether to learn class prior probabilities or not.
- If false, a uniform prior will be used.
-
- Methods
- -------
- fit(X, y) : self
- Fit the model
-
- predict(X) : array
- Predict using the model.
-
- predict_proba(X) : array
- Predict the probability of each class using the model.
-
- predict_log_proba(X) : array
- Predict the log probability of each class using the model.
-
- Attributes
- ----------
- `class_log_prior_`, `intercept_` : array, shape = [n_classes]
- Log probability of each class (smoothed).
-
- `feature_log_prob_`, `coef_` : array, shape = [n_classes, n_features]
- Empirical log probability of features given a class, P(x_i|y).
-
- (`class_log_prior_` and `feature_log_prob_` are properties referring to
- `intercept_` and `coef_`, respectively.)
-
- Examples
- --------
- >>> import numpy as np
- >>> X = np.random.randint(5, size=(6, 100))
- >>> Y = np.array([1, 2, 3, 4, 5, 6])
- >>> from scikits.learn.naive_bayes import MultinomialNB
- >>> clf = MultinomialNB()
- >>> clf.fit(X, Y)
- MultinomialNB(alpha=1.0, fit_prior=True)
- >>> print clf.predict(X[2])
- [3]
-
- References
- ----------
- For the rationale behind the names `coef_` and `intercept_`, i.e.
- naive Bayes as a linear classifier, see J. Rennie et al. (2003),
- Tackling the poor assumptions of naive Bayes text classifiers, ICML.
- """
-
- def __init__(self, alpha=1.0, fit_prior=True):
- self.alpha = alpha
- self.fit_prior = fit_prior
-
+class BaseDiscreteNB(BaseEstimator, ClassifierMixin):
def fit(self, X, y, class_prior=None):
- """Fit Multinomial Naive Bayes according to X, y
+ """Fit Naive Bayes classifier according to X, y
Parameters
----------
@@ -327,19 +259,6 @@ def fit(self, X, y, class_prior=None):
return self
- @staticmethod
- def _count(X, Y):
- """Count feature occurrences.
-
- Returns (N_c, N_c_i), where
- N_c is the count of all features in all samples of class c;
- N_c_i is the count of feature i in all samples of class c.
- """
- N_c_i = safe_sparse_dot(Y.T, X)
- N_c = np.sum(N_c_i, axis=1)
-
- return N_c, N_c_i
-
class_log_prior_ = property(lambda self: self.intercept_)
feature_log_prob_ = property(lambda self: self.coef_)
@@ -360,14 +279,6 @@ def predict(self, X):
return y_pred
- def _joint_log_likelihood(self, X):
- """Calculate the posterior log probability of the samples X"""
-
- X = atleast2d_or_csr(X)
-
- jll = safe_sparse_dot(self.coef_, X.T)
- return jll + np.atleast_2d(self.intercept_).T
-
def predict_proba(self, X):
"""
Return probability estimates for the test vector X.
@@ -406,7 +317,98 @@ def predict_log_proba(self, X):
return (jll - log_prob_x).T
-class BernoulliNB(MultinomialNB):
+class MultinomialNB(BaseDiscreteNB):
+ """
+ Naive Bayes classifier for multinomial models
+
+ The multinomial Naive Bayes classifier is suitable for classification with
+ discrete features (e.g., word counts for text classification). The
+ multinomial distribution normally requires integer feature counts. However,
+ in practice, fractional counts such as tf-idf may also work.
+
+ This class is designed to handle both dense and sparse data; it will enter
+ "sparse mode" if its training matrix (X) is a sparse matrix.
+
+ Parameters
+ ----------
+ alpha: float, optional (default=1.0)
+ Additive (Laplace/Lidstone) smoothing parameter
+ (0 for no smoothing).
+ fit_prior: boolean
+ Whether to learn class prior probabilities or not.
+ If false, a uniform prior will be used.
+
+ Methods
+ -------
+ fit(X, y) : self
+ Fit the model
+
+ predict(X) : array
+ Predict using the model.
+
+ predict_proba(X) : array
+ Predict the probability of each class using the model.
+
+ predict_log_proba(X) : array
+ Predict the log probability of each class using the model.
+
+ Attributes
+ ----------
+ `class_log_prior_`, `intercept_` : array, shape = [n_classes]
+ Log probability of each class (smoothed).
+
+ `feature_log_prob_`, `coef_` : array, shape = [n_classes, n_features]
+ Empirical log probability of features given a class, P(x_i|y).
+
+ (`class_log_prior_` and `feature_log_prob_` are properties referring to
+ `intercept_` and `coef_`, respectively.)
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> X = np.random.randint(5, size=(6, 100))
+ >>> Y = np.array([1, 2, 3, 4, 5, 6])
+ >>> from scikits.learn.naive_bayes import MultinomialNB
+ >>> clf = MultinomialNB()
+ >>> clf.fit(X, Y)
+ MultinomialNB(alpha=1.0, fit_prior=True)
+ >>> print clf.predict(X[2])
+ [3]
+
+ References
+ ----------
+ For the rationale behind the names `coef_` and `intercept_`, i.e.
+ naive Bayes as a linear classifier, see J. Rennie et al. (2003),
+ Tackling the poor assumptions of naive Bayes text classifiers, ICML.
+ """
+
+ def __init__(self, alpha=1.0, fit_prior=True):
+ self.alpha = alpha
+ self.fit_prior = fit_prior
+
+ @staticmethod
+ def _count(X, Y):
+ """Count feature occurrences.
+
+ Returns (N_c, N_c_i), where
+ N_c is the count of all features in all samples of class c;
+ N_c_i is the count of feature i in all samples of class c.
+ """
+ N_c_i = safe_sparse_dot(Y.T, X)
+ N_c = np.sum(N_c_i, axis=1)
+
+ return N_c, N_c_i
+
+ def _joint_log_likelihood(self, X):
+ """Calculate the posterior log probability of the samples X"""
+
+ X = atleast2d_or_csr(X)
+
+ jll = safe_sparse_dot(self.coef_, X.T)
+ return jll + np.atleast_2d(self.intercept_).T
+
+
+class BernoulliNB(BaseDiscreteNB):
"""Naive Bayes classifier for multivariate Bernoulli models.
Like MultinomialNB, this classifier is suitable for discrete data. The
@@ -449,8 +451,8 @@ class BernoulliNB(MultinomialNB):
`feature_log_prob_`, `coef_` : array, shape = [n_classes, n_features]
Empirical log probability of features given a class, P(x_i|y).
- (`class_log_prior_` and `feature_log_prob_` are properties referring to
- `intercept_` and `coef_`, respectively.)
+ (`class_log_prior_` and `feature_log_prob_` are properties referring to
+ `intercept_` and `coef_`, respectively.)
Examples
--------
@@ -460,7 +462,7 @@ class BernoulliNB(MultinomialNB):
>>> from scikits.learn.naive_bayes import BernoulliNB
>>> clf = BernoulliNB()
>>> clf.fit(X, Y)
- BernoulliNB(alpha=1.0, fit_prior=True)
+ BernoulliNB(binarize=0.0, alpha=1.0, fit_prior=True)
>>> print clf.predict(X[2])
[3]
@@ -480,32 +482,21 @@ class BernoulliNB(MultinomialNB):
def __init__(self, alpha=1.0, binarize=.0, fit_prior=True):
self.alpha = alpha
self.binarize = binarize
- self.fit_prior = True
-
- def fit(self, X, y, class_prior=None):
- """Fit Bernoulli Naive Bayes according to X, y
-
- Parameters
- ----------
- X : array-like, shape = [n_samples, n_features]
- Training vectors, where n_samples is the number of samples and
- n_features is the number of features. X may be a sparse matrix.
-
- y : array-like, shape = [n_samples]
- Target values.
+ self.fit_prior = fit_prior
- class_prior : array, shape [n_classes]
- Custom prior probability per class.
- Overrides the fit_prior parameter.
+ def _count(self, X, Y):
+ """Count feature occurrences.
- Returns
- -------
- self : object
- Returns self.
+ Returns (N_c, N_c_i), where
+ N_c is the count of all features in all samples of class c;
+ N_c_i is the count of feature i in all samples of class c.
"""
if self.binarize is not None:
X = binarize(X, threshold=self.binarize)
- return super(BernoulliNB, self).fit(X, y, class_prior)
+ N_c_i = safe_sparse_dot(Y.T, X)
+ N_c = np.sum(N_c_i, axis=1)
+
+ return N_c, N_c_i
def _joint_log_likelihood(self, X):
X = atleast2d_or_csr(X)

0 comments on commit 794605a

Please sign in to comment.