New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MRG + 1] Fix for OvR partial_fit bug #7786
Changes from 3 commits
ed08d38
cd527ee
6f27790
c428554
5fd925a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,8 @@ | |
from sklearn.svm import LinearSVC, SVC | ||
from sklearn.naive_bayes import MultinomialNB | ||
from sklearn.linear_model import (LinearRegression, Lasso, ElasticNet, Ridge, | ||
Perceptron, LogisticRegression) | ||
Perceptron, LogisticRegression, | ||
SGDClassifier) | ||
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor | ||
from sklearn.model_selection import GridSearchCV, cross_val_score | ||
from sklearn.pipeline import Pipeline | ||
|
@@ -89,16 +90,39 @@ def test_ovr_partial_fit(): | |
assert_greater(np.mean(y == pred), 0.65) | ||
|
||
# Test when mini batches doesn't have all classes | ||
# with MultinomialNB | ||
X = np.abs(np.random.randn(14, 2)) | ||
y = [1, 1, 1, 1, 2, 3, 3, 0, 0, 2, 3, 1, 2, 3] | ||
ovr = OneVsRestClassifier(MultinomialNB()) | ||
ovr.partial_fit(iris.data[:60], iris.target[:60], np.unique(iris.target)) | ||
ovr.partial_fit(iris.data[60:], iris.target[60:]) | ||
pred = ovr.predict(iris.data) | ||
ovr2 = OneVsRestClassifier(MultinomialNB()) | ||
pred2 = ovr2.fit(iris.data, iris.target).predict(iris.data) | ||
ovr.partial_fit(X[:7], y[:7], np.unique(y)) | ||
ovr.partial_fit(X[7:], y[7:]) | ||
pred = ovr.predict(X) | ||
ovr1 = OneVsRestClassifier(MultinomialNB()) | ||
pred1 = ovr1.fit(X, y).predict(X) | ||
assert_equal(np.mean(pred == y), np.mean(pred1 == y)) | ||
|
||
# Test when mini batches doesn't have all classes | ||
# with SGDClassifier | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As a reader of the test, it's not clear why you would want to test with both these base estimators. Also: can we do it in a loop for clarity that they're the same test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, happy for it to just be |
||
ovr = OneVsRestClassifier(SGDClassifier(n_iter=1, shuffle=False, | ||
random_state=0)) | ||
ovr.partial_fit(X[:7], y[:7], np.unique(y)) | ||
ovr.partial_fit(X[7:], y[7:]) | ||
pred = ovr.predict(X) | ||
ovr1 = OneVsRestClassifier(SGDClassifier(n_iter=1, shuffle=False, | ||
random_state=0)) | ||
pred1 = ovr1.fit(X, y).predict(X) | ||
assert_equal(np.mean(pred == y), np.mean(pred1 == y)) | ||
|
||
assert_almost_equal(pred, pred2) | ||
assert_equal(len(ovr.estimators_), len(np.unique(iris.target))) | ||
assert_greater(np.mean(iris.target == pred), 0.65) | ||
|
||
def test_ovr_partial_fit_exceptions(): | ||
ovr = OneVsRestClassifier(MultinomialNB()) | ||
X = np.abs(np.random.randn(14, 2)) | ||
y = [1, 1, 1, 1, 2, 3, 3, 0, 0, 2, 3, 1, 2, 3] | ||
ovr.partial_fit(X[:7], y[:7], np.unique(y)) | ||
# A new class value which was not in the first call of partial_fit | ||
# It should raise ValueError | ||
y1 = [5] + y[7:-1] | ||
assert_raises(ValueError, ovr.partial_fit, X=X[7:], y=y1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you use assert_raises_regex to check the error message as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lesteve Done! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but then you can drop this line. |
||
|
||
|
||
def test_ovr_ovo_regressor(): | ||
|
@@ -204,7 +228,6 @@ def test_ovr_multiclass(): | |
for base_clf in (MultinomialNB(), LinearSVC(random_state=0), | ||
LinearRegression(), Ridge(), | ||
ElasticNet()): | ||
|
||
clf = OneVsRestClassifier(base_clf).fit(X, y) | ||
assert_equal(set(clf.classes_), classes) | ||
y_pred = clf.predict(np.array([[0, 0, 4]]))[0] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for large
y
, iteration may be much slower than usingnp.setdiff1d
..?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes
np.setdiff1d
seems to be fasterThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
due to the use of
np.setdiff1d
it is not possible topartial_fit
with a sparsey