Issue #2559: Corrected how normalize works in BaseLibLinear

scikit-learn · Nov 9, 2013 · 76f31ca · 76f31ca
1 parent cfdd9da
commit 76f31ca
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 14 deletions.
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from .base import LinearClassifierMixin, SparseCoefMixin, center_data
+from .base import LinearClassifierMixin, SparseCoefMixin
 from ..feature_selection.from_model import _LearntSelectorMixin
 from ..svm.base import BaseLibLinear
 
@@ -142,10 +142,3 @@ def predict_log_proba(self, X):
             model, where classes are ordered as they are in ``self.classes_``.
         """
         return np.log(self.predict_proba(X))
-
-    def _center_data(self, X, y, fit_intercept, normalize=False):
-        """Center the data in X but not in y"""
-        X, _, X_mean, _, X_std = center_data(X, y, fit_intercept,
-                                            normalize=normalize)
-        return X, y, X_mean, y, X_std
-
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
@@ -14,7 +14,6 @@
 from ..utils.fixes import unique
 from ..utils.extmath import safe_sparse_dot
 from ..externals import six
-from ..linear_model.base import center_data
 
 
 LIBSVM_IMPL = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr']
@@ -606,8 +605,6 @@ def __init__(self, penalty='l2', loss='l2', dual=True, tol=1e-4, C=1.0,
         # Check that the arguments given are valid:
         self._get_solver_type()
 
-    _center_data = staticmethod(center_data)
-
     def _get_solver_type(self):
         """Find the liblinear magic number for the solver.
 
@@ -683,9 +680,9 @@ def fit(self, X, y):
         y = np.asarray(y, dtype=np.float64).ravel()
 
         # Center data if self.normalize
-        X, y, X_mean, y_mean, X_std = self._center_data(X, y,
-                                                        self.fit_intercept,
-                                                        self.normalize)
+        if self.normalize:
+            X_mean, X_std = np.mean(X), np.std(X)
+            X = (X - X_mean) / X_std
 
         self.raw_coef_ = liblinear.train_wrap(X, y,
                                               sp.isspmatrix(X),
@@ -706,6 +703,10 @@ def fit(self, X, y):
             self.coef_ = self.raw_coef_
             self.intercept_ = 0.
 
+        if self.normalize:
+            self.coef_ = self.coef_ / X_std
+            self.intercept_ = self.intercept_ - np.dot(X_mean, self.coef_.T)
+
         if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
             self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)
             if self.fit_intercept: