From 956e1f476103c045f6e3e7fef393bac36cf0d9f2 Mon Sep 17 00:00:00 2001
From: "John T. Wodder II" <git@varonathe.org>
Date: Mon, 10 Apr 2023 12:27:25 -0400
Subject: [PATCH] Write command arguments as lists of strings instead of
 splitting strings on whitespace

---
 heudiconv/cli/monitor.py           | 16 +++----
 heudiconv/tests/test_convert.py    | 17 +++++--
 heudiconv/tests/test_heuristics.py | 74 +++++++++++++++++++-----------
 heudiconv/tests/test_main.py       | 13 ++++--
 heudiconv/tests/test_queue.py      | 13 +++---
 5 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/heudiconv/cli/monitor.py b/heudiconv/cli/monitor.py
index c9f8925a..144fcae9 100644
--- a/heudiconv/cli/monitor.py
+++ b/heudiconv/cli/monitor.py
@@ -6,6 +6,7 @@
 import os.path as op
 from pathlib import Path
 import re
+import shlex
 import subprocess
 import time
 
@@ -29,17 +30,16 @@
 _LOGGER.addHandler(ch)
 
 
-def run_heudiconv(cmd):
+def run_heudiconv(args):
     info_dict = dict()
-    proc = subprocess.Popen(
-        cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT
-    )
+    proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    cmd = " ".join(map(shlex.quote, args))
     return_code = proc.wait()
     if return_code == 0:
-        _LOGGER.info("Done running {0}".format(cmd))
+        _LOGGER.info("Done running %s", cmd)
         info_dict["success"] = 1
     else:
-        _LOGGER.error("{0} failed".format(cmd))
+        _LOGGER.error("%s failed", cmd)
         info_dict["success"] = 0
     # get info on what we run
     stdout = proc.communicate()[0].decode("utf-8")
@@ -50,7 +50,6 @@ def run_heudiconv(cmd):
 
 
 def process(paths2process, db, wait=WAIT_TIME, logdir="log"):
-    cmd = "ls -l {0}"
     # if paths2process and
     # time.time() - os.path.getmtime(paths2process[0]) > WAIT_TIME:
     processed = []
@@ -58,13 +57,12 @@ def process(paths2process, db, wait=WAIT_TIME, logdir="log"):
         if time.time() - mod_time > wait:
             # process_me = paths2process.popleft().decode('utf-8')
             process_me = path
-            cmd_ = cmd.format(process_me)
             process_dict = {
                 "input_path": process_me,
                 "accession_number": op.basename(process_me),
             }
             print("Time to process {0}".format(process_me))
-            stdout, run_dict = run_heudiconv(cmd_)
+            stdout, run_dict = run_heudiconv(["ls", "-l", process_me])
             process_dict.update(run_dict)
             db.insert(process_dict)
             # save log
diff --git a/heudiconv/tests/test_convert.py b/heudiconv/tests/test_convert.py
index 459d9472..60dea79d 100644
--- a/heudiconv/tests/test_convert.py
+++ b/heudiconv/tests/test_convert.py
@@ -149,10 +149,19 @@ def test_b0dwi_for_fmap(tmpdir, caplog):
     caplog.set_level(logging.WARNING)
     tmppath = tmpdir.strpath
     subID = "b0dwiForFmap"
-    args = (
-        "-c dcm2niix -o %s -b -f test_b0dwi_for_fmap --files %s -s %s"
-        % (tmpdir, op.join(TESTS_DATA_PATH, "b0dwiForFmap"), subID)
-    ).split(" ")
+    args = [
+        "-c",
+        "dcm2niix",
+        "-o",
+        str(tmpdir),
+        "-b",
+        "-f",
+        "test_b0dwi_for_fmap",
+        "--files",
+        op.join(TESTS_DATA_PATH, "b0dwiForFmap"),
+        "-s",
+        subID,
+    ]
     runner(args)
 
     # assert that it raised a warning that the fmap directory will contain
diff --git a/heudiconv/tests/test_heuristics.py b/heudiconv/tests/test_heuristics.py
index 17cd9113..5c8fd988 100644
--- a/heudiconv/tests/test_heuristics.py
+++ b/heudiconv/tests/test_heuristics.py
@@ -29,10 +29,18 @@
 
 # this will fail if not in project's root directory
 def test_smoke_convertall(tmpdir):
-    args = (
-        "-c dcm2niix -o %s -b --datalad "
-        "-s fmap_acq-3mm -d %s/{subject}/*" % (tmpdir, TESTS_DATA_PATH)
-    ).split(" ")
+    args = [
+        "-c",
+        "dcm2niix",
+        "-o",
+        str(tmpdir),
+        "-b",
+        "--datalad",
+        "-s",
+        "fmap_acq-3mm",
+        "-d",
+        f"{TESTS_DATA_PATH}/{{subject}}/*",
+    ]
 
     # complain if no heurisitic
     with pytest.raises(RuntimeError):
@@ -46,23 +54,23 @@ def test_smoke_convertall(tmpdir):
 @pytest.mark.parametrize(
     "invocation",
     [
-        "--files %s" % TESTS_DATA_PATH,  # our new way with automated grouping
-        "-d %s/{subject}/* -s 01-fmap_acq-3mm"
-        % TESTS_DATA_PATH  # "old" way specifying subject
+        ["--files", TESTS_DATA_PATH],  # our new way with automated grouping
+        ["-d", f"{TESTS_DATA_PATH}/{{subject}}/*", "-s", "01-fmap_acq-3mm"],
+        # "old" way specifying subject
         # should produce the same results
     ],
 )
 @pytest.mark.skipif(Dataset is None, reason="no datalad")
 def test_reproin_largely_smoke(tmpdir, heuristic, invocation):
     is_bids = True if heuristic == "reproin" else False
-    arg = "--random-seed 1 -f %s -c dcm2niix -o %s" % (heuristic, tmpdir)
+    args = ["--random-seed", "1", "-f", heuristic, "-c", "dcm2niix", "-o", str(tmpdir)]
     if is_bids:
-        arg += " -b"
-    arg += " --datalad "
-    args = (arg + invocation).split(" ")
+        args.append("-b")
+    args.append("--datalad")
+    args.extend(invocation)
 
     # Test some safeguards
-    if invocation == "--files %s" % TESTS_DATA_PATH:
+    if invocation[0] == "--files":
         # Multiple subjects must not be specified -- only a single one could
         # be overridden from the command line
         with pytest.raises(ValueError):
@@ -109,13 +117,13 @@ def test_reproin_largely_smoke(tmpdir, heuristic, invocation):
 @pytest.mark.parametrize(
     "invocation",
     [
-        "--files %s" % TESTS_DATA_PATH,  # our new way with automated grouping
+        ["--files", TESTS_DATA_PATH],  # our new way with automated grouping
     ],
 )
 def test_scans_keys_reproin(tmpdir, invocation):
-    args = "-f reproin -c dcm2niix -o %s -b " % (tmpdir)
+    args = ["-f", "reproin", "-c", "dcm2niix", "-o", str(tmpdir), "-b"]
     args += invocation
-    runner(args.split())
+    runner(args)
     # for now check it exists
     scans_keys = glob(pjoin(tmpdir.strpath, "*/*/*/*/*/*.tsv"))
     assert len(scans_keys) == 1
@@ -134,7 +142,7 @@ def test_scans_keys_reproin(tmpdir, invocation):
 
 @patch("sys.stdout", new_callable=StringIO)
 def test_ls(stdout):
-    args = ("-f reproin --command ls --files %s" % (TESTS_DATA_PATH)).split(" ")
+    args = ["-f", "reproin", "--command", "ls", "--files", TESTS_DATA_PATH]
     runner(args)
     out = stdout.getvalue()
     assert "StudySessionInfo(locator=" in out
@@ -143,7 +151,7 @@ def test_ls(stdout):
 
 def test_scout_conversion(tmpdir):
     tmppath = tmpdir.strpath
-    args = ("-b -f reproin --files %s" % (TESTS_DATA_PATH)).split(" ") + ["-o", tmppath]
+    args = ["-b", "-f", "reproin", "--files", TESTS_DATA_PATH, "-o", tmppath]
     runner(args)
 
     dspath = Path(tmppath) / "Halchenko/Yarik/950_bids_test4"
@@ -174,12 +182,15 @@ def test_scout_conversion(tmpdir):
 )
 def test_notop(tmpdir, bidsoptions):
     tmppath = tmpdir.strpath
-    args = (
-        ("-f reproin --files %s" % (TESTS_DATA_PATH)).split(" ")
-        + ["-o", tmppath]
-        + ["-b"]
-        + bidsoptions
-    )
+    args = [
+        "-f",
+        "reproin",
+        "--files",
+        TESTS_DATA_PATH,
+        "-o",
+        tmppath,
+        "-b",
+    ] + bidsoptions
     runner(args)
 
     assert op.exists(pjoin(tmppath, "Halchenko/Yarik/950_bids_test4"))
@@ -201,10 +212,19 @@ def test_notop(tmpdir, bidsoptions):
 def test_phoenix_doc_conversion(tmpdir):
     tmppath = tmpdir.strpath
     subID = "Phoenix"
-    args = (
-        "-c dcm2niix -o %s -b -f bids_PhoenixReport --files %s -s %s"
-        % (tmpdir, pjoin(TESTS_DATA_PATH, "Phoenix"), subID)
-    ).split(" ")
+    args = [
+        "-c",
+        "dcm2niix",
+        "-o",
+        str(tmpdir),
+        "-b",
+        "-f",
+        "bids_PhoenixReport",
+        "--files",
+        pjoin(TESTS_DATA_PATH, "Phoenix"),
+        "-s",
+        subID,
+    ]
     runner(args)
 
     # check that the Phoenix document has been extracted (as gzipped dicom) in
diff --git a/heudiconv/tests/test_main.py b/heudiconv/tests/test_main.py
index 20fbaae3..aed739ba 100644
--- a/heudiconv/tests/test_main.py
+++ b/heudiconv/tests/test_main.py
@@ -282,9 +282,16 @@ def test_make_readonly(tmpdir):
 
 def test_cache(tmpdir):
     tmppath = tmpdir.strpath
-    args = ("-f convertall --files %s/axasc35.dcm -s S01" % (TESTS_DATA_PATH)).split(
-        " "
-    ) + ["-o", tmppath]
+    args = [
+        "-f",
+        "convertall",
+        "--files",
+        f"{TESTS_DATA_PATH}/axasc35.dcm",
+        "-s",
+        "S01",
+        "-o",
+        tmppath,
+    ]
     runner(args)
 
     cachedir = tmpdir / ".heudiconv" / "S01" / "info"
diff --git a/heudiconv/tests/test_queue.py b/heudiconv/tests/test_queue.py
index ee45de8c..264fabb0 100644
--- a/heudiconv/tests/test_queue.py
+++ b/heudiconv/tests/test_queue.py
@@ -10,17 +10,16 @@
 
 @pytest.mark.skipif(bool(which("sbatch")), reason="skip a real slurm call")
 @pytest.mark.parametrize(
-    "invocation",
+    "hargs",
     [
-        "--files %s/01-fmap_acq-3mm"
-        % TESTS_DATA_PATH,  # our new way with automated grouping
-        "-d %s/{subject}/* -s 01-fmap_acq-3mm"
-        % TESTS_DATA_PATH,  # "old" way specifying subject
+        # our new way with automated grouping
+        ["--files", f"{TESTS_DATA_PATH}/01-fmap_acq-3mm"],
+        # "old" way specifying subject
+        ["-d", f"{TESTS_DATA_PATH}/{{subject}}/*", "-s", "01-fmap_acq-3mm"],
     ],
 )
-def test_queue_no_slurm(tmpdir, invocation):
+def test_queue_no_slurm(tmpdir, hargs):
     tmpdir.chdir()
-    hargs = invocation.split(" ")
     hargs.extend(["-f", "reproin", "-b", "--minmeta", "--queue", "SLURM"])
 
     # simulate command-line call