Skip to content

Commit

Permalink
paired_ttest_kfold_cv
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Jan 19, 2018
1 parent a33b294 commit 845e07c
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 1 deletion.
4 changes: 3 additions & 1 deletion mlxtend/evaluate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
from .permutation import permutation_test
from .cochrans_q import cochrans_q
from .ttest import paired_ttest_resampled
from .ttest import paired_ttest_kfold_cv


__all__ = ["scoring", "confusion_matrix",
"mcnemar_table", "mcnemar_tables",
"mcnemar", "lift_score",
"bootstrap", "permutation_test",
"BootstrapOutOfBag", "bootstrap_point632_score",
"cochrans_q", "paired_ttest_resampled"]
"cochrans_q", "paired_ttest_resampled",
"paired_ttest_kfold_cv"]
114 changes: 114 additions & 0 deletions mlxtend/evaluate/tests/test_paired_ttest_kfold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# Sebastian Raschka 2014-2018
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import sys
from mlxtend.evaluate import paired_ttest_kfold_cv
from mlxtend.utils import assert_raises
from mlxtend.data import iris_data
from mlxtend.data import boston_housing_data
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split


def test_classifier_defaults():
X, y = iris_data()
clf1 = LogisticRegression(random_state=1)
clf2 = DecisionTreeClassifier(random_state=1)

X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.25,
random_state=123)

score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

assert round(score1, 2) == 0.97
assert round(score2, 2) == 0.95

t, p = paired_ttest_kfold_cv(estimator1=clf1,
estimator2=clf2,
X=X, y=y,
random_seed=1)

assert round(t, 3) == -1.861, t
assert round(p, 3) == 0.096, p

# change maxdepth of decision tree classifier

clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)

score3 = clf2.fit(X_train, y_train).score(X_test, y_test)

assert round(score3, 2) == 0.63

t, p = paired_ttest_kfold_cv(estimator1=clf1,
estimator2=clf2,
X=X, y=y,
random_seed=1)

assert round(t, 3) == 13.491, t
assert round(p, 3) == 0.000, p


def test_scoring():
X, y = iris_data()
clf1 = LogisticRegression(random_state=1)
clf2 = DecisionTreeClassifier(random_state=1)

X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.25,
random_state=123)

score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
score2 = clf2.fit(X_train, y_train).score(X_test, y_test)

assert round(score1, 2) == 0.97
assert round(score2, 2) == 0.95

t, p = paired_ttest_kfold_cv(estimator1=clf1,
estimator2=clf2,
X=X, y=y,
scoring='accuracy',
random_seed=1)

assert round(t, 3) == -1.861, t
assert round(p, 3) == 0.096, p

t, p = paired_ttest_kfold_cv(estimator1=clf1,
estimator2=clf2,
X=X, y=y,
scoring='f1_macro',
random_seed=1)

assert round(t, 3) == -1.872, t
assert round(p, 3) == 0.094, p


def test_regressor():
X, y = boston_housing_data()
reg1 = Lasso(random_state=1)
reg2 = Ridge(random_state=1)

X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.25,
random_state=123)

score1 = reg1.fit(X_train, y_train).score(X_test, y_test)
score2 = reg2.fit(X_train, y_train).score(X_test, y_test)

assert round(score1, 2) == 0.66, score1
assert round(score2, 2) == 0.68, score2

t, p = paired_ttest_kfold_cv(estimator1=reg1,
estimator2=reg2,
X=X, y=y,
random_seed=1)

assert round(t, 3) == -0.549, t
assert round(p, 3) == 0.596, p
101 changes: 101 additions & 0 deletions mlxtend/evaluate/ttest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import get_scorer


Expand Down Expand Up @@ -115,3 +116,103 @@ def paired_ttest_resampled(estimator1, estimator2, X, y,

pvalue = stats.t.sf(np.abs(t_stat), num_rounds - 1)*2.
return float(t_stat), float(pvalue)


def paired_ttest_kfold_cv(estimator1, estimator2, X, y,
cv=10,
scoring=None,
shuffle=False,
random_seed=None):
"""
Implements the k-fold paired t-test procedure
to compare the performance of two models.
Parameters
----------
estimator1 : scikit-learn classifier or regressor
estimator2 : scikit-learn classifier or regressor
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
cv : int (default: 10)
Number of splits and iteration for the
cross-validation procedure
scoring : str, callable, or None (default: None)
If None (default), uses 'accuracy' for sklearn classifiers
and 'r2' for sklearn regressors.
If str, uses a sklearn scoring metric string identifier, for example
{accuracy, f1, precision, recall, roc_auc} for classifiers,
{'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
'median_absolute_error', 'r2'} for regressors.
If a callable object or function is provided, it has to be conform with
sklearn's signature ``scorer(estimator, X, y)``; see
http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
for more information.
shuffle : bool (default: True)
Whether to shuffle the dataset for generating
the k-fold splits.
random_seed : int or None (default: None)
Random seed for shuffling the dataset
for generating the k-fold splits.
Ignored if shuffle=False.
Returns
----------
t : float
The t-statistic
pvalue : float
Two-tailed p-value.
If the chosen significance level is larger
than the p-value, we reject the null hypothesis
and accept that there are significant differences
in the two compared models.
"""

kf = KFold(n_splits=cv, random_state=random_seed, shuffle=shuffle)

if scoring is None:
if estimator1._estimator_type == 'classifier':
scoring = 'accuracy'
elif estimator1._estimator_type == 'regressor':
scoring = 'r2'
else:
raise AttributeError('Estimator must '
'be a Classifier or Regressor.')
if isinstance(scoring, str):
scorer = get_scorer(scoring)
else:
scorer = scoring

score_diff = []

for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

estimator1.fit(X_train, y_train)
estimator2.fit(X_train, y_train)

est1_score = scorer(estimator1, X_test, y_test)
est2_score = scorer(estimator2, X_test, y_test)
score_diff.append(est1_score - est2_score)

avg_diff = np.mean(score_diff)

numerator = avg_diff * np.sqrt(cv)
denominator = np.sqrt(sum([(diff - avg_diff)**2 for diff in score_diff])
/ (cv - 1))
t_stat = numerator / denominator

pvalue = stats.t.sf(np.abs(t_stat), cv - 1)*2.
return float(t_stat), float(pvalue)

0 comments on commit 845e07c

Please sign in to comment.