paired_ttest_kfold_cv

rasbt · Jan 19, 2018 · 845e07c · 845e07c
1 parent a33b294
commit 845e07c
Show file tree

Hide file tree

Showing 3 changed files with 218 additions and 1 deletion.
diff --git a/mlxtend/evaluate/__init__.py b/mlxtend/evaluate/__init__.py
@@ -16,11 +16,13 @@
 from .permutation import permutation_test
 from .cochrans_q import cochrans_q
 from .ttest import paired_ttest_resampled
+from .ttest import paired_ttest_kfold_cv
 
 
 __all__ = ["scoring", "confusion_matrix",
            "mcnemar_table", "mcnemar_tables",
            "mcnemar", "lift_score",
            "bootstrap", "permutation_test",
            "BootstrapOutOfBag", "bootstrap_point632_score",
-           "cochrans_q", "paired_ttest_resampled"]
+           "cochrans_q", "paired_ttest_resampled",
+           "paired_ttest_kfold_cv"]
diff --git a/mlxtend/evaluate/tests/test_paired_ttest_kfold.py b/mlxtend/evaluate/tests/test_paired_ttest_kfold.py
@@ -0,0 +1,114 @@
+# Sebastian Raschka 2014-2018
+# mlxtend Machine Learning Library Extensions
+# Author: Sebastian Raschka <sebastianraschka.com>
+#
+# License: BSD 3 clause
+
+import sys
+from mlxtend.evaluate import paired_ttest_kfold_cv
+from mlxtend.utils import assert_raises
+from mlxtend.data import iris_data
+from mlxtend.data import boston_housing_data
+from sklearn.linear_model import LogisticRegression
+from sklearn.linear_model import Lasso
+from sklearn.linear_model import Ridge
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import train_test_split
+
+
+def test_classifier_defaults():
+    X, y = iris_data()
+    clf1 = LogisticRegression(random_state=1)
+    clf2 = DecisionTreeClassifier(random_state=1)
+
+    X_train, X_test, y_train, y_test = \
+        train_test_split(X, y, test_size=0.25,
+                         random_state=123)
+
+    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
+    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)
+
+    assert round(score1, 2) == 0.97
+    assert round(score2, 2) == 0.95
+
+    t, p = paired_ttest_kfold_cv(estimator1=clf1,
+                                 estimator2=clf2,
+                                 X=X, y=y,
+                                 random_seed=1)
+
+    assert round(t, 3) == -1.861, t
+    assert round(p, 3) == 0.096, p
+
+    # change maxdepth of decision tree classifier
+
+    clf2 = DecisionTreeClassifier(max_depth=1, random_state=1)
+
+    score3 = clf2.fit(X_train, y_train).score(X_test, y_test)
+
+    assert round(score3, 2) == 0.63
+
+    t, p = paired_ttest_kfold_cv(estimator1=clf1,
+                                 estimator2=clf2,
+                                 X=X, y=y,
+                                 random_seed=1)
+
+    assert round(t, 3) == 13.491, t
+    assert round(p, 3) == 0.000, p
+
+
+def test_scoring():
+    X, y = iris_data()
+    clf1 = LogisticRegression(random_state=1)
+    clf2 = DecisionTreeClassifier(random_state=1)
+
+    X_train, X_test, y_train, y_test = \
+        train_test_split(X, y, test_size=0.25,
+                         random_state=123)
+
+    score1 = clf1.fit(X_train, y_train).score(X_test, y_test)
+    score2 = clf2.fit(X_train, y_train).score(X_test, y_test)
+
+    assert round(score1, 2) == 0.97
+    assert round(score2, 2) == 0.95
+
+    t, p = paired_ttest_kfold_cv(estimator1=clf1,
+                                 estimator2=clf2,
+                                 X=X, y=y,
+                                 scoring='accuracy',
+                                 random_seed=1)
+
+    assert round(t, 3) == -1.861, t
+    assert round(p, 3) == 0.096, p
+
+    t, p = paired_ttest_kfold_cv(estimator1=clf1,
+                                 estimator2=clf2,
+                                 X=X, y=y,
+                                 scoring='f1_macro',
+                                 random_seed=1)
+
+    assert round(t, 3) == -1.872, t
+    assert round(p, 3) == 0.094, p
+
+
+def test_regressor():
+    X, y = boston_housing_data()
+    reg1 = Lasso(random_state=1)
+    reg2 = Ridge(random_state=1)
+
+    X_train, X_test, y_train, y_test = \
+        train_test_split(X, y, test_size=0.25,
+                         random_state=123)
+
+    score1 = reg1.fit(X_train, y_train).score(X_test, y_test)
+    score2 = reg2.fit(X_train, y_train).score(X_test, y_test)
+
+    assert round(score1, 2) == 0.66, score1
+    assert round(score2, 2) == 0.68, score2
+
+    t, p = paired_ttest_kfold_cv(estimator1=reg1,
+                                 estimator2=reg2,
+                                 X=X, y=y,
+                                 random_seed=1)
+
+    assert round(t, 3) == -0.549, t
+    assert round(p, 3) == 0.596, p
diff --git a/mlxtend/evaluate/ttest.py b/mlxtend/evaluate/ttest.py
@@ -8,6 +8,7 @@
 import numpy as np
 from scipy import stats
 from sklearn.model_selection import train_test_split
+from sklearn.model_selection import KFold
 from sklearn.metrics import get_scorer
 
 
@@ -115,3 +116,103 @@ def paired_ttest_resampled(estimator1, estimator2, X, y,
 
     pvalue = stats.t.sf(np.abs(t_stat), num_rounds - 1)*2.
     return float(t_stat), float(pvalue)
+
+
+def paired_ttest_kfold_cv(estimator1, estimator2, X, y,
+                          cv=10,
+                          scoring=None,
+                          shuffle=False,
+                          random_seed=None):
+    """
+    Implements the k-fold paired t-test procedure
+    to compare the performance of two models.
+
+    Parameters
+    ----------
+    estimator1 : scikit-learn classifier or regressor
+
+    estimator2 : scikit-learn classifier or regressor
+
+    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+        Training vectors, where n_samples is the number of samples and
+        n_features is the number of features.
+
+    y : array-like, shape = [n_samples]
+        Target values.
+
+    cv : int (default: 10)
+        Number of splits and iteration for the
+        cross-validation procedure
+
+    scoring : str, callable, or None (default: None)
+        If None (default), uses 'accuracy' for sklearn classifiers
+        and 'r2' for sklearn regressors.
+        If str, uses a sklearn scoring metric string identifier, for example
+        {accuracy, f1, precision, recall, roc_auc} for classifiers,
+        {'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
+        'median_absolute_error', 'r2'} for regressors.
+        If a callable object or function is provided, it has to be conform with
+        sklearn's signature ``scorer(estimator, X, y)``; see
+        http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
+        for more information.
+
+    shuffle : bool (default: True)
+        Whether to shuffle the dataset for generating
+        the k-fold splits.
+
+    random_seed : int or None (default: None)
+        Random seed for shuffling the dataset
+        for generating the k-fold splits.
+        Ignored if shuffle=False.
+
+    Returns
+    ----------
+    t : float
+        The t-statistic
+
+    pvalue : float
+        Two-tailed p-value.
+        If the chosen significance level is larger
+        than the p-value, we reject the null hypothesis
+        and accept that there are significant differences
+        in the two compared models.
+
+    """
+
+    kf = KFold(n_splits=cv, random_state=random_seed, shuffle=shuffle)
+
+    if scoring is None:
+        if estimator1._estimator_type == 'classifier':
+            scoring = 'accuracy'
+        elif estimator1._estimator_type == 'regressor':
+            scoring = 'r2'
+        else:
+            raise AttributeError('Estimator must '
+                                 'be a Classifier or Regressor.')
+    if isinstance(scoring, str):
+        scorer = get_scorer(scoring)
+    else:
+        scorer = scoring
+
+    score_diff = []
+
+    for train_index, test_index in kf.split(X):
+        X_train, X_test = X[train_index], X[test_index]
+        y_train, y_test = y[train_index], y[test_index]
+
+        estimator1.fit(X_train, y_train)
+        estimator2.fit(X_train, y_train)
+
+        est1_score = scorer(estimator1, X_test, y_test)
+        est2_score = scorer(estimator2, X_test, y_test)
+        score_diff.append(est1_score - est2_score)
+
+    avg_diff = np.mean(score_diff)
+
+    numerator = avg_diff * np.sqrt(cv)
+    denominator = np.sqrt(sum([(diff - avg_diff)**2 for diff in score_diff])
+                          / (cv - 1))
+    t_stat = numerator / denominator
+
+    pvalue = stats.t.sf(np.abs(t_stat), cv - 1)*2.
+    return float(t_stat), float(pvalue)