nipreps · oesteban · Sep 20, 2021 · Sep 20, 2021
diff --git a/mriqc/__about__.py b/mriqc/__about__.py
@@ -4,7 +4,9 @@
 __version__ = get_versions()["version"]
 del get_versions
 
-__copyright__ = "Copyright 2020, Center for Reproducible Neuroscience, Stanford University"
+__copyright__ = (
+    "Copyright 2020, Center for Reproducible Neuroscience, Stanford University"
+)
 __credits__ = "Oscar Esteban"
 __download__ = f"https://github.com/poldracklab/mriqc/archive/{__version__}.tar.gz"
 __all__ = ["__version__", "__copyright__", "__credits__", "__download__"]
diff --git a/mriqc/bin/abide2bids.py b/mriqc/bin/abide2bids.py
@@ -25,8 +25,12 @@ def main():
     )
     g_input = parser.add_argument_group("Inputs")
     g_input.add_argument("-i", "--input-abide-catalog", action="store", required=True)
-    g_input.add_argument("-n", "--dataset-name", action="store", default="ABIDE Dataset")
-    g_input.add_argument("-u", "--nitrc-user", action="store", default=os.getenv("NITRC_USER"))
+    g_input.add_argument(
+        "-n", "--dataset-name", action="store", default="ABIDE Dataset"
+    )
+    g_input.add_argument(
+        "-u", "--nitrc-user", action="store", default=os.getenv("NITRC_USER")
+    )
     g_input.add_argument(
         "-p",
         "--nitrc-password",

diff --git a/mriqc/bin/dfcheck.py b/mriqc/bin/dfcheck.py
@@ -72,7 +72,9 @@ def main():
         tst_keep = np.sum(tst_rows.isin(ref_rows).values.ravel().tolist())
         print(tst_keep)
 
-    diff = ~np.isclose(ref_df[ref_names].values, tst_df[tst_names].values, rtol=opts.tolerance)
+    diff = ~np.isclose(
+        ref_df[ref_names].values, tst_df[tst_names].values, rtol=opts.tolerance
+    )
     if np.any(diff):
         # ne_stacked = pd.DataFrame(data=diff, columns=ref_names).stack()
         # ne_stacked = np.isclose(ref_df[ref_names], tst_df[ref_names]).stack()

diff --git a/mriqc/bin/fs2gif.py b/mriqc/bin/fs2gif.py
@@ -83,7 +83,9 @@ def main():
 
         niifile = op.join(tmp_sub, "%s.nii.gz") % subid
         ref_file = op.join(sub_path, "mri", "T1.mgz")
-        sp.call(["mri_convert", op.join(sub_path, "mri", "norm.mgz"), niifile], cwd=tmp_sub)
+        sp.call(
+            ["mri_convert", op.join(sub_path, "mri", "norm.mgz"), niifile], cwd=tmp_sub
+        )
         data = nb.load(niifile).get_data()
         data[data > 0] = 1
 
@@ -112,7 +114,10 @@ def main():
                 )
                 tclfp.write("    SetSlice $slice\n")
                 tclfp.write("    RedrawScreen\n")
-                tclfp.write('    SaveTIFF [format "%s/%s-' % (tmp_sub, subid) + '%03d.tif" $i]\n')
+                tclfp.write(
+                    '    SaveTIFF [format "%s/%s-' % (tmp_sub, subid)
+                    + '%03d.tif" $i]\n'
+                )
                 tclfp.write("    incr i\n")
                 tclfp.write("}\n")
                 tclfp.write("QuitMedit\n")
@@ -155,11 +160,16 @@ def main():
                     "for { set slice %d } { $slice < %d } { incr slice } {"
                     % (bbox_min[2], bbox_max[2])
                 )
-                tclfp.write("    SetZoomCenter %d %d $slice\n" % (center[0] + 30, center[1] - 10))
+                tclfp.write(
+                    "    SetZoomCenter %d %d $slice\n"
+                    % (center[0] + 30, center[1] - 10)
+                )
                 tclfp.write("    SetSlice $slice\n")
                 tclfp.write("    RedrawScreen\n")
                 tclfp.write(
-                    '    SaveTIFF [format "{}/{}-lh-%03d.tif" $i]\n'.format(tmp_sub, subid)
+                    '    SaveTIFF [format "{}/{}-lh-%03d.tif" $i]\n'.format(
+                        tmp_sub, subid
+                    )
                 )
                 tclfp.write("    incr i\n")
                 tclfp.write("}\n")
@@ -182,11 +192,16 @@ def main():
                     "for { set slice %d } { $slice < %d } { incr slice } {"
                     % (bbox_min[2], bbox_max[2])
                 )
-                tclfp.write("    SetZoomCenter %d %d $slice\n" % (center[0] - 30, center[1] - 10))
+                tclfp.write(
+                    "    SetZoomCenter %d %d $slice\n"
+                    % (center[0] - 30, center[1] - 10)
+                )
                 tclfp.write("    SetSlice $slice\n")
                 tclfp.write("    RedrawScreen\n")
                 tclfp.write(
-                    '    SaveTIFF [format "{}/{}-rh-%03d.tif" $slice]\n'.format(tmp_sub, subid)
+                    '    SaveTIFF [format "{}/{}-rh-%03d.tif" $slice]\n'.format(
+                        tmp_sub, subid
+                    )
                 )
                 tclfp.write("    incr i\n")
                 tclfp.write("}\n")

diff --git a/mriqc/bin/messages.py b/mriqc/bin/messages.py
@@ -1,18 +1,24 @@
-ABIDE_SUBJECT_FETCHED = "Successfully processed subject {subject_id} from site {site_name}"
+ABIDE_SUBJECT_FETCHED = (
+    "Successfully processed subject {subject_id} from site {site_name}"
+)
 ABIDE_TEMPORAL_WARNING = "WARNING: Error deleting temporal files: {message}"
-BIDS_LABEL_MISSING = "Participant label(s) not found in the BIDS root directory: {label}"
-BIDS_GROUP_SIZE = "Group size should be at least 0 (i.e. all participants assigned to same group)."
+BIDS_LABEL_MISSING = (
+    "Participant label(s) not found in the BIDS root directory: {label}"
+)
+BIDS_GROUP_SIZE = (
+    "Group size should be at least 0 (i.e. all participants assigned to same group)."
+)
 CLF_CAPTURED_WARNING = "Captured warning ({category}): {message}"
-CLF_CLASSIFIER_MISSING = "No training samples were given, and the --load-classifier option {info}."
+CLF_CLASSIFIER_MISSING = (
+    "No training samples were given, and the --load-classifier option {info}."
+)
 CLF_SAVED_RESULTS = "Results saved as {path}."
 CLF_TRAIN_LOAD_ERROR = "Errors ({n_errors}) loading training set: {errors}."
 CLF_WRONG_PARAMETER_COUNT = "Wrong number of parameters."
 DFCHECK_CSV_CHANGED = "Output CSV file changed one or more values."
 DFCHECK_CSV_COLUMNS = "Output CSV file changed number of columns."
 DFCHECK_DIFFERENT_BITS = "Dataset has different BIDS bits w.r.t. reference."
-DFCHECK_DIFFERENT_LENGTH = (
-    "Input datases have different lengths (input={len_input}, reference={len_reference})."
-)
+DFCHECK_DIFFERENT_LENGTH = "Input datases have different lengths (input={len_input}, reference={len_reference})."
 DFCHECK_IQMS_CORRELATED = "All IQMs show a Pearson correlation >= 0.95."
 DFCHECK_IQMS_UNDER_095 = "IQMs with Pearson correlation < 0.95:\n{iqms}"
 HASH_REPORT = "{sha} {file_name}"

diff --git a/mriqc/bin/mriqc_clf.py b/mriqc/bin/mriqc_clf.py
@@ -37,7 +37,12 @@
 
 
 def warn_redirect(
-    message: str, category: str, filename: str, lineno: int, file: str = None, line: str = None
+    message: str,
+    category: str,
+    filename: str,
+    lineno: int,
+    file: str = None,
+    line: str = None,
 ) -> None:
     """
     Caches a list of raised warning categories.
@@ -59,7 +64,9 @@ def warn_redirect(
     """
     if category not in cached_warnings:
         # Log captured warning
-        debug_message = messages.CLF_CAPTURED_WARNING.format(category=category, message=message)
+        debug_message = messages.CLF_CAPTURED_WARNING.format(
+            category=category, message=message
+        )
         LOGGER.debug(debug_message)
         # Add category to cache
         cached_warnings.append(category)
@@ -88,7 +95,9 @@ def get_parser() -> ArgumentParser:
     parser.add_argument(
         "--test", nargs="*", help="test data tables, X and Y, leave empty for DS030"
     )
-    parser.add_argument("-X", "--evaluation-data", help="classify this CSV table of IQMs")
+    parser.add_argument(
+        "-X", "--evaluation-data", help="classify this CSV table of IQMs"
+    )
 
     parser.add_argument(
         "--train-balanced-leaveout",
@@ -160,7 +169,9 @@ def get_parser() -> ArgumentParser:
         default=0,
         help="increases log verbosity for each occurence.",
     )
-    g_input.add_argument("--njobs", action="store", default=-1, type=int, help="number of jobs")
+    g_input.add_argument(
+        "--njobs", action="store", default=-1, type=int, help="number of jobs"
+    )
 
     g_input.add_argument(
         "-t",
@@ -280,15 +291,19 @@ def main():
         # Set held-out data
         cv_helper.setXtest(test_path[0], test_path[1])
         # Evaluate
-        cv_helper.evaluate(matrix=True, scoring=[opts.scorer, "accuracy"], save_pred=True)
+        cv_helper.evaluate(
+            matrix=True, scoring=[opts.scorer, "accuracy"], save_pred=True
+        )
 
         # Pickle if required
         if not clf_loaded:
             cv_helper.fit_full()
             cv_helper.save(suffix="data-all_estimator")
 
     if opts.evaluation_data:
-        cv_helper.predict_dataset(opts.evaluation_data, save_pred=True, thres=opts.threshold)
+        cv_helper.predict_dataset(
+            opts.evaluation_data, save_pred=True, thres=opts.threshold
+        )
 
     results_path = abspath(cv_helper._base_name + "*")
     saved_results_message = messages.CLF_SAVED_RESULTS.format(path=results_path)

diff --git a/mriqc/bin/subject_wrangler.py b/mriqc/bin/subject_wrangler.py
@@ -105,7 +105,9 @@ def main():
 
         if list(set(subject_list) - set(all_subjects)):
             non_exist = list(set(subject_list) - set(all_subjects))
-            missing_label_error = messages.BIDS_LABEL_MISSING.format(label=" ".join(non_exist))
+            missing_label_error = messages.BIDS_LABEL_MISSING.format(
+                label=" ".join(non_exist)
+            )
             raise RuntimeError(missing_label_error)
 
     if not opts.no_randomize:

diff --git a/mriqc/classifier/data.py b/mriqc/classifier/data.py
@@ -8,7 +8,12 @@
 import numpy as np
 import pandas as pd
 from mriqc import config
-from mriqc.messages import CREATED_DATASET, DROPPING_NON_NUMERICAL, POST_Z_NANS, Z_SCORING
+from mriqc.messages import (
+    CREATED_DATASET,
+    DROPPING_NON_NUMERICAL,
+    POST_Z_NANS,
+    Z_SCORING,
+)
 from mriqc.utils.misc import BIDS_COMP
 
 
@@ -62,7 +67,9 @@ def read_iqms(feat_file):
 
     if feat_file.suffix == ".csv":
         bids_comps = list(BIDS_COMP.keys())
-        x_df = pd.read_csv(feat_file, index_col=False, dtype={col: str for col in bids_comps})
+        x_df = pd.read_csv(
+            feat_file, index_col=False, dtype={col: str for col in bids_comps}
+        )
         # Find present bids bits and sort by them
         bids_comps_present = list(set(x_df.columns.ravel().tolist()) & set(bids_comps))
         bids_comps_present = [bit for bit in bids_comps if bit in bids_comps_present]
@@ -79,7 +86,9 @@ def read_iqms(feat_file):
                 pass
     else:
         bids_comps_present = ["subject_id"]
-        x_df = pd.read_csv(feat_file, index_col=False, sep="\t", dtype={"bids_name": str})
+        x_df = pd.read_csv(
+            feat_file, index_col=False, sep="\t", dtype={"bids_name": str}
+        )
         x_df = x_df.sort_values(by=["bids_name"])
         x_df["subject_id"] = x_df.bids_name.str.lstrip("sub-")
         x_df = x_df.drop(columns=["bids_name"])
@@ -111,7 +120,9 @@ def read_labels(
     output_labels = rate_label
 
     bids_comps = list(BIDS_COMP.keys())
-    y_df = pd.read_csv(label_file, index_col=False, dtype={col: str for col in bids_comps})
+    y_df = pd.read_csv(
+        label_file, index_col=False, dtype={col: str for col in bids_comps}
+    )
 
     # Find present bids bits and sort by them
     bids_comps_present = get_bids_cols(y_df)

diff --git a/mriqc/classifier/helper.py b/mriqc/classifier/helper.py
@@ -576,7 +576,9 @@ def evaluate(self, scoring=None, matrix=False, save_roc=False, save_pred=False):
             )
 
             score = scores[scoring.index("accuracy")]
-            pvalue = (np.sum(permutation_scores >= score) + 1.0) / (self._permutation_test + 1)
+            pvalue = (np.sum(permutation_scores >= score) + 1.0) / (
+                self._permutation_test + 1
+            )
             LOG.info(
                 "Permutation test (N=%d) for accuracy score %f (pvalue=%f)",
                 self._permutation_test,
@@ -660,7 +662,9 @@ def _save_pred_table(self, sample, prob_y, pred_y, suffix):
             predf["prob_y"] = prob_y[:, 1]
             predf["pred_y"] = pred_y
 
-        predf[bidts + cols].to_csv(self._gen_fname(suffix=suffix, ext="csv"), index=False)
+        predf[bidts + cols].to_csv(
+            self._gen_fname(suffix=suffix, ext="csv"), index=False
+        )
 
     def save(self, suffix="estimator", compress=3):
         """

diff --git a/mriqc/classifier/sklearn/_validation.py b/mriqc/classifier/sklearn/_validation.py
@@ -90,7 +90,9 @@ def _fit_and_score(
 
     # Adjust length of sample weights
     fit_params = fit_params if fit_params is not None else {}
-    fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()])
+    fit_params = dict(
+        [(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]
+    )
 
     if parameters is not None:
         estimator.set_params(**parameters)
@@ -169,7 +171,8 @@ def _score(estimator, X_test, y_test, scorer):
             pass
     if not isinstance(score, numbers.Number):
         raise ValueError(
-            "scoring must return a number, got %s (%s) instead." % (str(score), type(score))
+            "scoring must return a number, got %s (%s) instead."
+            % (str(score), type(score))
         )
     return score
 

diff --git a/mriqc/classifier/sklearn/cv_nested.py b/mriqc/classifier/sklearn/cv_nested.py
@@ -103,7 +103,9 @@ def _fit(self, X, y, groups, parameter_iterable):
         pre_dispatch = self.pre_dispatch
 
         cv_iter = list(cv.split(X, y, groups))
-        out = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch)(
+        out = Parallel(
+            n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch
+        )(
             delayed(_model_fit_and_score)(
                 estimator,
                 X,
@@ -135,7 +137,9 @@ def _fit(self, X, y, groups, parameter_iterable):
                 parameters,
             ) = zip(*out)
         else:
-            (test_scores, test_sample_counts, fit_time, score_time, parameters) = zip(*out)
+            (test_scores, test_sample_counts, fit_time, score_time, parameters) = zip(
+                *out
+            )
 
         candidate_params = parameters[::n_splits]
         n_candidates = len(candidate_params)
@@ -153,7 +157,9 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
             results["mean_%s" % key_name] = array_means
             # Weighted std is not directly available in numpy
             array_stds = np.sqrt(
-                np.average((array - array_means[:, np.newaxis]) ** 2, axis=1, weights=weights)
+                np.average(
+                    (array - array_means[:, np.newaxis]) ** 2, axis=1, weights=weights
+                )
             )
             results["std_%s" % key_name] = array_stds
 
@@ -251,8 +257,7 @@ def _model_fit_and_score(
 
     # Adjust length of sample weights
     fit_params = fit_params if fit_params is not None else {}
-    fit_params = {k: _index_param_value(X, v, train)
-                  for k, v in fit_params.items()}
+    fit_params = {k: _index_param_value(X, v, train) for k, v in fit_params.items()}
 
     if parameters is not None:
         estimator.set_params(**parameters)
@@ -336,8 +341,7 @@ def nested_fit_and_score(
 
     # Adjust length of sample weights
     fit_params = fit_params if fit_params is not None else {}
-    fit_params = {k: _index_param_value(X, v, train)
-                  for k, v in fit_params.items()}
+    fit_params = {k: _index_param_value(X, v, train) for k, v in fit_params.items()}
 
     if parameters is not None:
         estimator.set_params(**parameters)
@@ -398,13 +402,16 @@ def nested_fit_and_score(
             score_time = time.time() - start_time - fit_time
         else:
             LOG.warning(
-                "Test set has no positive labels, scoring has been skipped " "in this loop."
+                "Test set has no positive labels, scoring has been skipped "
+                "in this loop."
             )
 
         if return_train_score:
             train_score = _score(estimator, X_train, y_train, scorer)
 
-        acc_score = _score(estimator, X_test, y_test, check_scoring(estimator, scoring="accuracy"))
+        acc_score = _score(
+            estimator, X_test, y_test, check_scoring(estimator, scoring="accuracy")
+        )
 
     if verbose > 0:
         total_time = score_time + fit_time

diff --git a/mriqc/classifier/sklearn/parameters.py b/mriqc/classifier/sklearn/parameters.py
@@ -94,7 +94,9 @@ def __iter__(self):
     def __len__(self):
         """Number of points on the grid."""
         # Product function that can handle iterables (np.product can't).
-        return sum(_len(points) for p in self.param_grid for estim, points in list(p.items()))
+        return sum(
+            _len(points) for p in self.param_grid for estim, points in list(p.items())
+        )
 
     def __getitem__(self, ind):
         """Get the parameters that would be ``ind``th in iteration