fix: properly resolve wildcards in group components (#2620)

### QC  * [x] The PR contains a test case for the changes or the changes are already covered by an existing test case. * [ ] The documentation (`docs/`) is updated to reflect the changes or this is not necessary (e.g. if the change does neither modify the language nor the behavior or functionalities of Snakemake).
snakemake · Jan 15, 2024 · c788a46 · c788a46
1 parent daa6d9f
commit c788a46
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 3 deletions.
diff --git a/docs/executing/grouping.rst b/docs/executing/grouping.rst
@@ -38,3 +38,10 @@ This makes it possible to define batches of jobs of the same kind that shall be
 
 means that given ``n`` jobs spawned from rule ``somerule``, Snakemake will create ``n / 5`` groups which each execute 5 jobs of ``somerule`` together.
 For example, with 10 jobs from ``somerule`` you would end up with 2 groups of 5 jobs that are submitted as one piece each.
+
+Furthermore, it is possible to use wildcards in group names.
+This way, you can e.g. have a group per sample, e.g.:
+
+.. code-block:: bash
+
+    snakemake --groups somerule=group_{sample} --group-components group_{sample}=5
diff --git a/snakemake/dag.py b/snakemake/dag.py
@@ -1380,7 +1380,10 @@ def _update_group_components(self):
         for group in self._group.values():
             groups_by_id[group.groupid].add(group)
         for groupid, conn_components in groups_by_id.items():
-            n_components = self.workflow.group_settings.group_components.get(groupid, 1)
+            lookup_id = self.workflow.parent_groupids.get(groupid, groupid)
+            n_components = self.workflow.group_settings.group_components.get(
+                lookup_id, 1
+            )
             if n_components > 1:
                 for chunk in group_into_chunks(n_components, conn_components):
                     if len(chunk) > 1:

diff --git a/snakemake/io.py b/snakemake/io.py
@@ -642,7 +642,7 @@ async def retrieve_from_storage(self):
         if self.is_storage:
             if not self.should_not_be_retrieved_from_storage:
                 mtime = await self.mtime()
-                if not await self.exists_local() or mtime_local < mtime.storage():
+                if not await self.exists_local() or mtime.local() < mtime.storage():
                     logger.info(f"Retrieving from storage: {self.storage_object.query}")
                     await self.storage_object.managed_retrieve()
                     logger.info("Finished retrieval.")

diff --git a/snakemake/rules.py b/snakemake/rules.py
@@ -1012,7 +1012,10 @@ def expand_group(self, wildcards):
             item, _ = self.apply_input_function(self.group, wildcards)
             return item
         elif isinstance(self.group, str):
-            return apply_wildcards(self.group, wildcards)
+            resolved = apply_wildcards(self.group, wildcards)
+            if resolved != self.group:
+                self.workflow.parent_groupids[resolved] = self.group
+            return resolved
         else:
             return self.group
 

diff --git a/snakemake/workflow.py b/snakemake/workflow.py
@@ -172,6 +172,7 @@ def __post_init__(self):
         self._onerror = lambda log: None
         self._onstart = lambda log: None
         self._rulecount = 0
+        self._parent_groupids = dict()
         self.global_container_img = None
         self.global_is_containerized = False
         self.configfiles = list(self.config_settings.configfiles)
@@ -213,6 +214,10 @@ def __post_init__(self):
 
         self.globals["config"] = copy.deepcopy(self.config_settings.overwrite_config)
 
+    @property
+    def parent_groupids(self):
+        return self._parent_groupids
+
     def tear_down(self):
         for conda_env in self.injected_conda_envs:
             conda_env.deactivate()