Skip to content

Commit

Permalink
Merge ccf8ec2 into 57aa05a
Browse files Browse the repository at this point in the history
  • Loading branch information
adam2392 committed Jul 1, 2020
2 parents 57aa05a + ccf8ec2 commit ced8072
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 8 deletions.
2 changes: 1 addition & 1 deletion docs/sources/CHANGELOG.md
Expand Up @@ -19,7 +19,7 @@ The CHANGELOG for the current development version is available at

##### New Features

- -
- Add `predict_proba` kwarg to bootstrap methods, to allow bootstrapping of scoring functions that take in probability values. ([#700](https://github.com/rasbt/mlxtend/pull/700) via [Adam Li](https://github.com/adam2392))

##### Changes

Expand Down
39 changes: 36 additions & 3 deletions mlxtend/evaluate/bootstrap_point632.py
Expand Up @@ -44,6 +44,7 @@ def mse(targets, predictions):

def bootstrap_point632_score(estimator, X, y, n_splits=200,
method='.632', scoring_func=None,
predict_proba=False,
random_seed=None,
clone_estimator=True):
"""
Expand Down Expand Up @@ -93,6 +94,17 @@ def bootstrap_point632_score(estimator, X, y, n_splits=200,
estimator is a classifier and mean squared error
if the estimator is a regressor.
predict_proba : bool
Whether to use the `predict_proba` function for the
`estimator` argument. This is to be used in conjunction
with `scoring_func` which takes in probability values
instead of actual predictions.
For example, if the scoring_func is
:meth:`sklearn.metrics.roc_auc_score`, then use
`predict_proba=True`.
Note that this requires `estimator` to have
`predict_proba` method implemented.
random_seed : int (default=None)
If int, random_seed is the seed used by
the random number generator.
Expand Down Expand Up @@ -153,21 +165,42 @@ def bootstrap_point632_score(estimator, X, y, n_splits=200,
raise AttributeError('Estimator type undefined.'
'Please provide a scoring_func argument.')

# determine which prediction function to use
# either label, or probability prediction
if not predict_proba:
predict_func = cloned_est.predict
else:
if not getattr(cloned_est, 'predict_proba', None):
raise RuntimeError(f'The estimator {cloned_est} does not '
f'support predicting probabilities via '
f'`predict_proba` function.')
predict_func = cloned_est.predict_proba

oob = BootstrapOutOfBag(n_splits=n_splits, random_seed=random_seed)
scores = np.empty(dtype=np.float, shape=(n_splits,))
cnt = 0
for train, test in oob.split(X):
cloned_est.fit(X[train], y[train])

test_acc = scoring_func(y[test], cloned_est.predict(X[test]))
# get the prediction probability
# for binary class uses the last column
predicted_test_val = predict_func(X[test])
predicted_train_val = predict_func(X[train])
if predict_proba:
len_uniq = np.unique(y)

if len(len_uniq) == 2:
predicted_train_val = predicted_train_val[:, 1]
predicted_test_val = predicted_test_val[:, 1]

test_acc = scoring_func(y[test], predicted_test_val)

if method == 'oob':
acc = test_acc

else:
test_err = 1 - test_acc
train_err = 1 - scoring_func(y[train],
cloned_est.predict(X[train]))
train_err = 1 - scoring_func(y[train], predicted_train_val)
if method == '.632+':
gamma = 1 - (no_information_rate(
y,
Expand Down
37 changes: 33 additions & 4 deletions mlxtend/evaluate/tests/test_bootstrap_point632.py
Expand Up @@ -5,15 +5,23 @@
# License: BSD 3 clause

import numpy as np
from mlxtend.evaluate import bootstrap_point632_score
from mlxtend.utils import assert_raises
from mlxtend.data import iris_data
import pytest
from sklearn.base import BaseEstimator
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from mlxtend.data import iris_data
from mlxtend.evaluate import bootstrap_point632_score
from mlxtend.utils import assert_raises

X, y = iris_data()


class FakeClassifier(BaseEstimator):
def __init__(self):
pass


def test_defaults():
lr = LogisticRegression(solver='liblinear', multi_class='ovr')
scores = bootstrap_point632_score(lr, X, y, random_seed=123)
Expand Down Expand Up @@ -63,10 +71,10 @@ def test_632plus():


def test_custom_accuracy():

def accuracy2(targets, predictions):
return sum([i == j for i, j in
zip(targets, predictions)]) / len(targets)

lr = LogisticRegression(solver='liblinear', multi_class='ovr')
scores = bootstrap_point632_score(lr, X, y,
random_seed=123,
Expand Down Expand Up @@ -121,3 +129,24 @@ def test_scoring():
f1 = np.mean(scores)
assert len(scores == 200)
assert np.round(f1, 2) == 1.0, f1


def test_scoring_proba():
from sklearn.metrics import roc_auc_score
lr = LogisticRegression(solver='liblinear', multi_class='ovr')

# test predict_proba
scores = bootstrap_point632_score(lr, X[:100], y[:100],
scoring_func=roc_auc_score,
predict_proba=True,
random_seed=123)
roc_auc = np.mean(scores)
assert len(scores == 200)
assert np.round(roc_auc, 2) == 1.0, roc_auc

with pytest.raises(RuntimeError):
clf = FakeClassifier()
scores = bootstrap_point632_score(clf, X[:100], y[:100],
scoring_func=roc_auc_score,
predict_proba=True,
random_seed=123)

0 comments on commit ced8072

Please sign in to comment.