Skip to content
Permalink
Browse files
feat: Allow paramspace to separate filename params with custom separa…
…tor (#1299)

* Allow paramspace to separate filename params with custom separator

* Add test for 'filename_sep'

* Add missing files

* Use filename separator in tests which hopefully works better on windows

Co-authored-by: Johannes Köster <johannes.koester@tu-dortmund.de>
  • Loading branch information
kpj and johanneskoester committed Apr 19, 2022
1 parent fe50b5c commit 8236e80794d0f9c9670238ba168770c0947e8379
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 12 deletions.
@@ -608,7 +608,7 @@ class Paramspace:
By default, a directory structure with on folder level per parameter is created
(e.g. column1~{column1}/column2~{column2}/***).
The exact behavior can be tweaked with two parameters:
The exact behavior can be tweaked with three parameters:
- ``filename_params`` takes a list of column names of the passed dataframe.
These names are used to build the filename (separated by '_') in the order
@@ -622,16 +622,26 @@ class Paramspace:
If ``filename_params="*"``, all columns of the dataframe are encoded into
the filename instead of parent directories.
- ``param_sep`` takes a string that is used to join the column name and
- ``param_sep`` takes a string which is used to join the column name and
column value in the generated paths (Default: '~'). Example:
| ``Paramspace(df, param_sep=":")`` ->
| column1:{value1}/column2:{value2}/column3:{value3}/column4:{value4}
- ``filename_sep`` takes a string which is used to join the parameter
entries listed in ``filename_params`` in the generated paths
(Default: '_'). Example:
| ``Paramspace(df, filename_params="*", filename_sep="-")`` ->
| column1~{value1}-column2~{value2}-column3~{value3}-column4~{value4}
"""

def __init__(self, dataframe, filename_params=None, param_sep="~"):
def __init__(
self, dataframe, filename_params=None, param_sep="~", filename_sep="_"
):
self.dataframe = dataframe
self.param_sep = param_sep
self.filename_sep = filename_sep
if filename_params is None or not filename_params:
# create a pattern of the form {}/{}/{} with one entry for each
# column in the dataframe
@@ -653,7 +663,7 @@ def __init__(self, dataframe, filename_params=None, param_sep="~"):
self.pattern = "/".join(
[r"{}"] * (len(self.dataframe.columns) - len(filename_params) + 1)
)
self.pattern = "_".join(
self.pattern = self.filename_sep.join(
[self.pattern] + [r"{}"] * (len(filename_params) - 1)
)
self.ordered_columns = [
@@ -2,34 +2,41 @@ from snakemake.utils import Paramspace
import pandas as pd


# shold result in alpha~{alpha}/beta~{beta}/gamma~{gamma}
# should result in alpha~{alpha}/beta~{beta}/gamma~{gamma}
paramspace_default = Paramspace(pd.read_csv("params.tsv", sep="\t"))

# shold result in alpha~{alpha}/beta~{beta}/gamma~{gamma}
# should result in alpha~{alpha}/beta~{beta}/gamma~{gamma}
paramspace_empty = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=[])

# shold result in alpha~{alpha}/gamma~{gamma}/beta~{beta}
# should result in alpha~{alpha}/gamma~{gamma}/beta~{beta}
paramspace_one = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["beta"])

# shold result in alpha~{alpha}/beta~{beta}_gamma~{gamma}
# should result in alpha~{alpha}/beta~{beta}_gamma~{gamma}
paramspace_two = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["beta", "gamma"])

# shold result in alpha~{alpha}_beta~{beta}_gamma~{gamma}
# should result in alpha~{alpha}_beta~{beta}_gamma~{gamma}
paramspace_full = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["alpha", "beta", "gamma"])

# shold result in beta~{beta}_gamma~{gamma}_alpha~{alpha}
# should result in beta~{beta}_gamma~{gamma}_alpha~{alpha}
paramspace_full_reorder = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["beta", "gamma", "alpha"])

# shold result in alpha:{alpha}/beta:{beta}/gamma:{gamma}
# should result in alpha:{alpha}/beta:{beta}/gamma:{gamma}
paramspace_sep = Paramspace(pd.read_csv("params.tsv", sep="\t"), param_sep="_is_")

# shold result in beta={beta}_gamma={gamma}_alpha={alpha}
# should result in beta={beta}_gamma={gamma}_alpha={alpha}
paramspace_sep_and_pattern = Paramspace(
pd.read_csv("params.tsv", sep="\t"),
filename_params=["beta", "gamma", "alpha"],
param_sep="=",
)

# should result in alpha~{alpha}::beta~{beta}::gamma~{gamma}
paramspace_filenamesep = Paramspace(
pd.read_csv("params.tsv", sep="\t"),
filename_params="*",
filename_sep="__",
)


rule all:
input:
@@ -41,6 +48,7 @@ rule all:
expand("results/full_reorder/plots/{params}.pdf", params=paramspace_full_reorder.instance_patterns),
expand("results/sep/plots/{params}.pdf", params=paramspace_sep.instance_patterns),
expand("results/sep_and_pattern/plots/{params}.pdf", params=paramspace_sep_and_pattern.instance_patterns),
expand("results/filenamesep/plots/{params}.pdf", params=paramspace_filenamesep.instance_patterns),


rule simulate_default:
@@ -185,3 +193,21 @@ rule plot_sep_and_pattern:
f"results/sep_and_pattern/plots/{paramspace_sep_and_pattern.wildcard_pattern}.pdf"
shell:
"touch {output}"


rule simulate_filenamesep:
output:
f"results/filenamesep/simulations/{paramspace_filenamesep.wildcard_pattern}.tsv"
params:
simulation=paramspace_filenamesep.instance
script:
"scripts/simulate.py"


rule plot_filenamesep:
input:
f"results/filenamesep/simulations/{paramspace_filenamesep.wildcard_pattern}.tsv"
output:
f"results/filenamesep/plots/{paramspace_filenamesep.wildcard_pattern}.pdf"
shell:
"touch {output}"
@@ -0,0 +1 @@
{'alpha': 1.0, 'beta': 0.1, 'gamma': 0.99}
@@ -0,0 +1 @@
{'alpha': 2.0, 'beta': 0.0, 'gamma': 3.9}

0 comments on commit 8236e80

Please sign in to comment.