Skip to content

Commit

Permalink
Issue #2559: Corrected how normalize works in BaseLibLinear
Browse files Browse the repository at this point in the history
  • Loading branch information
ilblackdragon committed Nov 9, 2013
1 parent cfdd9da commit 76f31ca
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 14 deletions.
9 changes: 1 addition & 8 deletions sklearn/linear_model/logistic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np

from .base import LinearClassifierMixin, SparseCoefMixin, center_data
from .base import LinearClassifierMixin, SparseCoefMixin
from ..feature_selection.from_model import _LearntSelectorMixin
from ..svm.base import BaseLibLinear

Expand Down Expand Up @@ -142,10 +142,3 @@ def predict_log_proba(self, X):
model, where classes are ordered as they are in ``self.classes_``.
"""
return np.log(self.predict_proba(X))

def _center_data(self, X, y, fit_intercept, normalize=False):
"""Center the data in X but not in y"""
X, _, X_mean, _, X_std = center_data(X, y, fit_intercept,
normalize=normalize)
return X, y, X_mean, y, X_std

13 changes: 7 additions & 6 deletions sklearn/svm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from ..utils.fixes import unique
from ..utils.extmath import safe_sparse_dot
from ..externals import six
from ..linear_model.base import center_data


LIBSVM_IMPL = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr']
Expand Down Expand Up @@ -606,8 +605,6 @@ def __init__(self, penalty='l2', loss='l2', dual=True, tol=1e-4, C=1.0,
# Check that the arguments given are valid:
self._get_solver_type()

_center_data = staticmethod(center_data)

def _get_solver_type(self):
"""Find the liblinear magic number for the solver.
Expand Down Expand Up @@ -683,9 +680,9 @@ def fit(self, X, y):
y = np.asarray(y, dtype=np.float64).ravel()

# Center data if self.normalize
X, y, X_mean, y_mean, X_std = self._center_data(X, y,
self.fit_intercept,
self.normalize)
if self.normalize:
X_mean, X_std = np.mean(X), np.std(X)
X = (X - X_mean) / X_std

self.raw_coef_ = liblinear.train_wrap(X, y,
sp.isspmatrix(X),
Expand All @@ -706,6 +703,10 @@ def fit(self, X, y):
self.coef_ = self.raw_coef_
self.intercept_ = 0.

if self.normalize:
self.coef_ = self.coef_ / X_std
self.intercept_ = self.intercept_ - np.dot(X_mean, self.coef_.T)

if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)
if self.fit_intercept:
Expand Down

0 comments on commit 76f31ca

Please sign in to comment.