Skip to content

Commit

Permalink
fix: properly resolve wildcards in group components (#2620)
Browse files Browse the repository at this point in the history
### QC
<!-- Make sure that you can tick the boxes below. -->

* [x] The PR contains a test case for the changes or the changes are
already covered by an existing test case.
* [ ] The documentation (`docs/`) is updated to reflect the changes or
this is not necessary (e.g. if the change does neither modify the
language nor the behavior or functionalities of Snakemake).
  • Loading branch information
johanneskoester committed Jan 15, 2024
1 parent daa6d9f commit c788a46
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 3 deletions.
7 changes: 7 additions & 0 deletions docs/executing/grouping.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,10 @@ This makes it possible to define batches of jobs of the same kind that shall be
means that given ``n`` jobs spawned from rule ``somerule``, Snakemake will create ``n / 5`` groups which each execute 5 jobs of ``somerule`` together.
For example, with 10 jobs from ``somerule`` you would end up with 2 groups of 5 jobs that are submitted as one piece each.

Furthermore, it is possible to use wildcards in group names.
This way, you can e.g. have a group per sample, e.g.:

.. code-block:: bash
snakemake --groups somerule=group_{sample} --group-components group_{sample}=5
5 changes: 4 additions & 1 deletion snakemake/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -1380,7 +1380,10 @@ def _update_group_components(self):
for group in self._group.values():
groups_by_id[group.groupid].add(group)
for groupid, conn_components in groups_by_id.items():
n_components = self.workflow.group_settings.group_components.get(groupid, 1)
lookup_id = self.workflow.parent_groupids.get(groupid, groupid)
n_components = self.workflow.group_settings.group_components.get(
lookup_id, 1
)
if n_components > 1:
for chunk in group_into_chunks(n_components, conn_components):
if len(chunk) > 1:
Expand Down
2 changes: 1 addition & 1 deletion snakemake/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ async def retrieve_from_storage(self):
if self.is_storage:
if not self.should_not_be_retrieved_from_storage:
mtime = await self.mtime()
if not await self.exists_local() or mtime_local < mtime.storage():
if not await self.exists_local() or mtime.local() < mtime.storage():
logger.info(f"Retrieving from storage: {self.storage_object.query}")
await self.storage_object.managed_retrieve()
logger.info("Finished retrieval.")
Expand Down
5 changes: 4 additions & 1 deletion snakemake/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,10 @@ def expand_group(self, wildcards):
item, _ = self.apply_input_function(self.group, wildcards)
return item
elif isinstance(self.group, str):
return apply_wildcards(self.group, wildcards)
resolved = apply_wildcards(self.group, wildcards)
if resolved != self.group:
self.workflow.parent_groupids[resolved] = self.group
return resolved
else:
return self.group

Expand Down
5 changes: 5 additions & 0 deletions snakemake/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ def __post_init__(self):
self._onerror = lambda log: None
self._onstart = lambda log: None
self._rulecount = 0
self._parent_groupids = dict()
self.global_container_img = None
self.global_is_containerized = False
self.configfiles = list(self.config_settings.configfiles)
Expand Down Expand Up @@ -213,6 +214,10 @@ def __post_init__(self):

self.globals["config"] = copy.deepcopy(self.config_settings.overwrite_config)

@property
def parent_groupids(self):
return self._parent_groupids

def tear_down(self):
for conda_env in self.injected_conda_envs:
conda_env.deactivate()
Expand Down

0 comments on commit c788a46

Please sign in to comment.