Merge 9a5fce0 into 9836dfb

yeatmanlab · Feb 16, 2022 · f47cb8a · f47cb8a
2 parents 9836dfb + 9a5fce0
commit f47cb8a
Show file tree

Hide file tree

Showing 5 changed files with 71 additions and 64 deletions.
diff --git a/afqinsight/_serial_bagging.py b/afqinsight/_serial_bagging.py
@@ -238,7 +238,7 @@ class SerialBaggingClassifier(BaggingClassifier):
     base_estimator_ : estimator
         The base estimator from which the ensemble is grown.
 
-    n_features_ : int
+    n_features_in_ : int
         The number of features when :meth:`fit` is performed.
 
     estimators_ : list of estimators
@@ -357,7 +357,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
             sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
 
         # Remap output
-        n_samples, self.n_features_ = X.shape
+        n_samples, self.n_features_in_ = X.shape
         self._n_samples = n_samples
         y = self._validate_y(y)
         self.y_train_ = np.copy(y)
@@ -384,11 +384,11 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
         if isinstance(self.max_features, numbers.Integral):
             max_features = self.max_features
         elif isinstance(self.max_features, np.float):
-            max_features = self.max_features * self.n_features_
+            max_features = self.max_features * self.n_features_in_
         else:
             raise ValueError("max_features must be int or float")
 
-        if not (0 < max_features <= self.n_features_):
+        if not (0 < max_features <= self.n_features_in_):
             raise ValueError("max_features must be in (0, n_features]")
 
         max_features = max(1, int(max_features))
@@ -482,7 +482,7 @@ def _get_estimators_indices(self):
                 seed,
                 self.bootstrap_features,
                 self.bootstrap,
-                self.n_features_,
+                self.n_features_in_,
                 self._n_samples,
                 self._max_features,
                 self._max_samples,
@@ -519,12 +519,12 @@ def predict_proba(self, X):
             X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False
         )
 
-        if self.n_features_ != X.shape[1]:
+        if self.n_features_in_ != X.shape[1]:
             raise ValueError(
                 "Number of features of the model must "
                 "match the input. Model n_features is {0} and "
                 "input n_features is {1}."
-                "".format(self.n_features_, X.shape[1])
+                "".format(self.n_features_in_, X.shape[1])
             )
 
         # Partition the estimators
@@ -573,12 +573,12 @@ def predict_log_proba(self, X):
                 X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False
             )
 
-            if self.n_features_ != X.shape[1]:
+            if self.n_features_in_ != X.shape[1]:
                 raise ValueError(
                     "Number of features of the model must "
                     "match the input. Model n_features is {0} "
                     "and input n_features is {1} "
-                    "".format(self.n_features_, X.shape[1])
+                    "".format(self.n_features_in_, X.shape[1])
                 )
 
             # Partition the estimators
@@ -635,12 +635,12 @@ def decision_function(self, X):
             X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False
         )
 
-        if self.n_features_ != X.shape[1]:
+        if self.n_features_in_ != X.shape[1]:
             raise ValueError(
                 "Number of features of the model must "
                 "match the input. Model n_features is {0} and "
                 "input n_features is {1} "
-                "".format(self.n_features_, X.shape[1])
+                "".format(self.n_features_in_, X.shape[1])
             )
 
         # Partition the estimators
@@ -747,7 +747,7 @@ class SerialBaggingRegressor(BaggingRegressor):
     base_estimator_ : estimator
         The base estimator from which the ensemble is grown.
 
-    n_features_ : int
+    n_features_in_ : int
         The number of features when :meth:`fit` is performed.
 
     estimators_ : list of estimators
@@ -872,7 +872,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
             sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
 
         # Remap output
-        n_samples, self.n_features_ = X.shape
+        n_samples, self.n_features_in_ = X.shape
         self._n_samples = n_samples
         y = self._validate_y(y)
 
@@ -898,11 +898,11 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
         if isinstance(self.max_features, numbers.Integral):
             max_features = self.max_features
         elif isinstance(self.max_features, np.float):  # pragma: no cover
-            max_features = self.max_features * self.n_features_
+            max_features = self.max_features * self.n_features_in_
         else:  # pragma: no cover
             raise ValueError("max_features must be int or float")
 
-        if not (0 < max_features <= self.n_features_):  # pragma: no cover
+        if not (0 < max_features <= self.n_features_in_):  # pragma: no cover
             raise ValueError("max_features must be in (0, n_features]")
 
         max_features = max(1, int(max_features))

diff --git a/afqinsight/cnn.py b/afqinsight/cnn.py
@@ -15,7 +15,7 @@
     "To use afqinsight's convolutional neural nets for tractometry data, you will need "
     "to have tensorflow and kerastuner installed. You can do this by installing "
     "afqinsight with `pip install afqinsight[tf]`, or by separately installing these packages "
-    "with `pip install tensorflow kerastuner`."
+    "with `pip install tensorflow keras-tuner`."
 )
 
 kt, _, _ = optional_package("keras_tuner", keras_msg)

diff --git a/afqinsight/cross_validate.py b/afqinsight/cross_validate.py
@@ -9,9 +9,14 @@
 
 from joblib import delayed, Parallel
 from sklearn.base import clone, is_classifier
+from sklearn.metrics import check_scoring
 from sklearn.metrics._scorer import _check_multimetric_scoring
 from sklearn.model_selection._split import check_cv
-from sklearn.model_selection._validation import _aggregate_score_dicts, _fit_and_score
+from sklearn.model_selection._validation import (
+    _aggregate_score_dicts,
+    _fit_and_score,
+    _normalize_score_results,
+)
 from sklearn.pipeline import Pipeline
 from sklearn.utils import indexable
 
@@ -125,18 +130,18 @@ def _fit_and_score_ckpt(
             with open(pkl_file, "rb") as fp:
                 estimator = pickle.load(fp)
 
-            scores.append(estimator)
+            scores["estimator"] = estimator
 
         return scores
     else:
         scores = _fit_and_score(estimator, **fit_and_score_kwargs)
         os.makedirs(workdir, exist_ok=True)
         if fit_and_score_kwargs.get("return_estimator", False):
-            estimator = scores[-1]
+            estimator = scores["estimator"]
             with open(pkl_file, "wb") as fp:
                 pickle.dump(estimator, fp)
 
-            ckpt_scores = scores[:-1]
+            ckpt_scores = {key: scores[key] for key in scores if key != "estimator"}
             if isinstance(estimator, Pipeline):
                 model = estimator.steps[-1]
             else:
@@ -161,7 +166,7 @@ def _fit_and_score_ckpt(
         fit_and_score_kwargs.pop("y")
 
         if "scorer" in fit_and_score_kwargs:
-            fit_and_score_kwargs["scorer"] = list(fit_and_score_kwargs["scorer"].keys())
+            fit_and_score_kwargs.pop("scorer")
 
         ckpt_dict = {
             "scores": ckpt_scores,
@@ -392,7 +397,13 @@ def cross_validate_checkpoint(
     X, y, groups = indexable(X, y, groups)
 
     cv = check_cv(cv, y, classifier=is_classifier(estimator))
-    scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring)
+
+    if callable(scoring):
+        scorers = scoring
+    elif scoring is None or isinstance(scoring, str):
+        scorers = check_scoring(estimator, scoring)
+    else:
+        scorers = _check_multimetric_scoring(estimator, scoring)
 
     # We clone the estimator to make sure that all the folds are
     # independent, and that it is pickle-able.
@@ -442,26 +453,23 @@ def cross_validate_checkpoint(
             for train, test in cv.split(X, y, groups)
         )
 
-    zipped_scores = list(zip(*scores))
-    if return_train_score:
-        train_scores = zipped_scores.pop(0)
-        train_scores = _aggregate_score_dicts(train_scores)
-    if return_estimator:
-        fitted_estimators = zipped_scores.pop()
-    test_scores, fit_times, score_times = zipped_scores
-    test_scores = _aggregate_score_dicts(test_scores)
+    results = _aggregate_score_dicts(scores)
 
     ret = {}
-    ret["fit_time"] = np.array(fit_times)
-    ret["score_time"] = np.array(score_times)
+    ret["fit_time"] = results["fit_time"]
+    ret["score_time"] = results["score_time"]
 
     if return_estimator:
-        ret["estimator"] = fitted_estimators
+        ret["estimator"] = results["estimator"]
+
+    test_scores_dict = _normalize_score_results(results["test_scores"])
+    if return_train_score:
+        train_scores_dict = _normalize_score_results(results["train_scores"])
 
-    for name in scorers:
-        ret["test_%s" % name] = np.array(test_scores[name])
+    for name in test_scores_dict:
+        ret["test_%s" % name] = test_scores_dict[name]
         if return_train_score:
             key = "train_%s" % name
-            ret[key] = np.array(train_scores[name])
+            ret[key] = train_scores_dict[name]
 
     return ret
diff --git a/setup.cfg b/setup.cfg
@@ -33,13 +33,13 @@ setup_requires =
 python_requires = >=3.6
 install_requires =
     dipy>=1.0.0
-    groupyr>=0.2.5
+    groupyr>=0.2.6
     matplotlib
     numpy
     pandas>=1.1.0
     requests
     seaborn
-    scikit-learn>=0.23.1,<0.24
+    scikit-learn>=1.0.0
     sklearn_pandas>=2.0.0
     tables>=3.0.0
     tqdm
@@ -54,24 +54,24 @@ tf =
     tensorflow
     keras-tuner
 dev =
-    black>=21.12b0
+    black
+    flake8
+    matplotlib
+    numpydoc
+    pre-commit
+    pydocstyle
+    pytest-cov
+    pytest-xdist[psutil]
+    pytest
+    scipy<=1.7.3
+    sphinx
+    sphinx-gallery
+    sphinx-panels
+    sphinx-rtd-theme
+    tox
     typing-extensions>=3.10.0.0
-    flake8>=3.8.3
-    numpydoc>=1.1.0
-    matplotlib>=3.3.0
-    pre-commit>=2.9.2
-    pydocstyle>=5.1.1
-    pytest-cov>=2.10.1
-    pytest-xdist[psutil]>=2.1.0
-    pytest>=6.0.1
-    scipy>=1.2.0,<1.6.0
-    sphinx>=3.2.1
-    sphinx-gallery>=0.8.1
-    sphinx-panels>=0.5.2
-    sphinx-rtd-theme>=0.5.0
-    tox>=1.8.0
 maint =
-    rapidfuzz==0.12.2
+    rapidfuzz
 
 all =
     %(torch)s

diff --git a/tox.ini b/tox.ini
@@ -1,29 +1,28 @@
 [tox]
 minversion = 1.8.0
-envlist = sklearn{231,232}
+envlist = afqinsight
 isolated_build = True
 
 [testenv]
 deps =
-    setuptools_scm
-    pytest-cov>=2.10.1
-    pytest-xdist[psutil]==2.1.0
-    pytest>=6.0.1
     dipy>=1.0.0
-    groupyr==0.2.5
+    groupyr==0.2.6
     h5py>=3.0.0
+    keras-tuner
     matplotlib
     numpy
     pandas>=1.1.0
+    pytest
+    pytest-cov
+    pytest-xdist[psutil]
     requests
+    scikit-learn>=1.0.0
+    scipy<=1.7.3
     seaborn
-    scipy>=1.2.0,<1.6.0
+    setuptools_scm
     sklearn_pandas>=2.0.0
     tables>=3.0.0
     tensorflow
-    keras-tuner
     torch
     tqdm
-    sklearn231: scikit-learn==0.23.1
-    sklearn232: scikit-learn==0.23.2
 commands = pytest --pyargs afqinsight --cov-report term-missing --cov-config .coveragerc --cov=afqinsight -n auto