nextstrain · tsibley · May 3, 2022 · Mar 11, 2022
diff --git a/workflow/snakemake_rules/common.smk b/workflow/snakemake_rules/common.smk
@@ -2,8 +2,24 @@
 """
 import datetime
 from itertools import product
+from shlex import (
+    quote as shquote,       # shquote() is used in this file and also other workflow files
+    split as shsplitwords,
+)
 from urllib.parse import urlsplit
 
+def shquotewords(s: str) -> str:
+    """
+    Split string *s* into (POSIX) shell words, quote each word, and join them
+    back into a string.
+
+    This is suitable for properly quoting multi-word, user-defined values which
+    should follow shell quoting and escaping semantics (e.g. to allow spaces in
+    single words) but not allow shell features like variable interpolation,
+    command substition, redirection, piping, etc.
+    """
+    return " ".join(shquote(word) for word in shsplitwords(s))
+
 def numeric_date(dt=None):
     """
     Convert datetime object to the numeric date.
@@ -74,15 +90,12 @@ def _get_filter_min_length_query(wildcards):
             # as recommended by pandas:
             #
             # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
-            #
-            # We escape the backticks with backslashes to prevent the bash shell
-            # from expanding the contents between the backticks as a subprocess.
-            query_parts.append(f"(\`{input_name}\` == 'yes' & _length >= {min_length})")
+            query_parts.append(f"(`{input_name}` == 'yes' & _length >= {min_length})")
         else:
             query_parts.append(f"(_length >= {min_length})")
 
     query = " | ".join(query_parts)
-    return f"--query \"{query}\""
+    return f"--query {shquote(query)}"
 
 def _get_filter_value(wildcards, key):
     for input_name in config["inputs"].keys():

diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk
@@ -160,9 +160,9 @@ def get_priority_argument(wildcards):
         return ""
 
     if subsampling_settings["priorities"]["type"] == "proximity":
-        return "--priority " + get_priorities(wildcards)
+        return "--priority " + shquote(get_priorities(wildcards))
     elif subsampling_settings["priorities"]["type"] == "file" and "file" in subsampling_settings["priorities"]:
-        return "--priority " + subsampling_settings["priorities"]["file"]
+        return "--priority " + shquote(subsampling_settings["priorities"]["file"])
     else:
         return ""
 
@@ -199,15 +199,15 @@ def _get_specific_subsampling_setting(setting, optional=False):
             elif setting == 'max_sequences':
                 value = f"--subsample-max-sequences {value}"
 
-            return value
+            return shquotewords(value)
         else:
             value = ""
 
         # Check format strings that haven't been resolved.
         if re.search(r'\{.+\}', value):
             raise Exception(f"The parameters for the subsampling scheme '{wildcards.subsample}' of build '{wildcards.build_name}' reference build attributes that are not defined in the configuration file: '{value}'. Add these build attributes to the appropriate configuration file and try again.")
 
-        return value
+        return shquotewords(value)
 
     return _get_setting