New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Issue #2559: Added normalize option to LogisticRegression #2567
Changes from 5 commits
cfdd9da
76f31ca
b6f262f
4fe4255
1b3c783
669c3b0
9e4458a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -585,15 +585,17 @@ class BaseLibLinear(six.with_metaclass(ABCMeta, BaseEstimator)): | |
|
||
@abstractmethod | ||
def __init__(self, penalty='l2', loss='l2', dual=True, tol=1e-4, C=1.0, | ||
multi_class='ovr', fit_intercept=True, intercept_scaling=1, | ||
class_weight=None, verbose=0, random_state=None): | ||
multi_class='ovr', fit_intercept=True, normalize=False, | ||
intercept_scaling=1, class_weight=None, verbose=0, | ||
random_state=None): | ||
|
||
self.penalty = penalty | ||
self.loss = loss | ||
self.dual = dual | ||
self.tol = tol | ||
self.C = C | ||
self.fit_intercept = fit_intercept | ||
self.normalize = normalize | ||
self.intercept_scaling = intercept_scaling | ||
self.multi_class = multi_class | ||
self.class_weight = class_weight | ||
|
@@ -660,7 +662,6 @@ def fit(self, X, y): | |
" one.") | ||
|
||
X = atleast2d_or_csr(X, dtype=np.float64, order="C") | ||
|
||
self.class_weight_ = compute_class_weight(self.class_weight, | ||
self.classes_, y) | ||
|
||
|
@@ -677,6 +678,12 @@ def fit(self, X, y): | |
|
||
# LibLinear wants targets as doubles, even for classification | ||
y = np.asarray(y, dtype=np.float64).ravel() | ||
|
||
# Center data if self.normalize | ||
if self.normalize: | ||
X_mean, X_std = np.mean(X, axis=0), np.std(X, axis=0) | ||
X = (X - X_mean) / X_std | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if there will be a consensus to merge this but let me give feedback to help doing this will break on sparse data. Maybe preventing normalize on sparse data is the way to go. X -= X_mean this will prevent a reallocation of the size of X. |
||
|
||
self.raw_coef_ = liblinear.train_wrap(X, y, | ||
sp.isspmatrix(X), | ||
self._get_solver_type(), | ||
|
@@ -696,6 +703,10 @@ def fit(self, X, y): | |
self.coef_ = self.raw_coef_ | ||
self.intercept_ = 0. | ||
|
||
if self.normalize: | ||
self.coef_ = self.coef_ / X_std | ||
self.intercept_ = self.intercept_ - np.dot(X_mean, self.coef_.T) | ||
|
||
if self.multi_class == "crammer_singer" and len(self.classes_) == 2: | ||
self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1) | ||
if self.fit_intercept: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
line too long. Consider a pep8 checker in your editor.
Also it should be:
to verify that the prediction with an array that has already been normalized is the same as if normalize option is enabled.